Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Copyright (C) 2008 Oracle.  All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/log2.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <crypto/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include "misc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include "disk-io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include "transaction.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include "btrfs_inode.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include "volumes.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include "ordered-data.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include "compression.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include "extent_io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include "extent_map.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) const char* btrfs_compress_type2str(enum btrfs_compression_type type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 	case BTRFS_COMPRESS_ZLIB:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 	case BTRFS_COMPRESS_LZO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 	case BTRFS_COMPRESS_ZSTD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 	case BTRFS_COMPRESS_NONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) 		return btrfs_compress_types[type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) bool btrfs_compress_is_valid_type(const char *str, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 	for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 		size_t comp_len = strlen(btrfs_compress_types[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 		if (len < comp_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 		if (!strncmp(btrfs_compress_types[i], str, comp_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) static int compression_compress_pages(int type, struct list_head *ws,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)                struct address_space *mapping, u64 start, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)                unsigned long *out_pages, unsigned long *total_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)                unsigned long *total_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	case BTRFS_COMPRESS_ZLIB:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 		return zlib_compress_pages(ws, mapping, start, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 				out_pages, total_in, total_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	case BTRFS_COMPRESS_LZO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 		return lzo_compress_pages(ws, mapping, start, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 				out_pages, total_in, total_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	case BTRFS_COMPRESS_ZSTD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 		return zstd_compress_pages(ws, mapping, start, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 				out_pages, total_in, total_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	case BTRFS_COMPRESS_NONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 		 * This can happen when compression races with remount setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 		 * it to 'no compress', while caller doesn't call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 		 * inode_need_compress() to check if we really need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 		 * compress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 		 * Not a big deal, just need to inform caller that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 		 * haven't allocated any pages yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 		*out_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 		return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) static int compression_decompress_bio(int type, struct list_head *ws,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 		struct compressed_bio *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	case BTRFS_COMPRESS_LZO:  return lzo_decompress_bio(ws, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	case BTRFS_COMPRESS_NONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 		 * This can't happen, the type is validated several times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 		 * before we get here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) static int compression_decompress(int type, struct list_head *ws,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)                unsigned char *data_in, struct page *dest_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)                unsigned long start_byte, size_t srclen, size_t destlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 						start_byte, srclen, destlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	case BTRFS_COMPRESS_LZO:  return lzo_decompress(ws, data_in, dest_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 						start_byte, srclen, destlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 						start_byte, srclen, destlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	case BTRFS_COMPRESS_NONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 		 * This can't happen, the type is validated several times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 		 * before we get here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) static int btrfs_decompress_bio(struct compressed_bio *cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 				      unsigned long disk_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	return sizeof(struct compressed_bio) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 		(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 				 u64 disk_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	char *kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	u8 csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	struct compressed_bio *cb = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	u8 *cb_sum = cb->sums;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	if (inode->flags & BTRFS_INODE_NODATASUM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	shash->tfm = fs_info->csum_shash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	for (i = 0; i < cb->nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 		page = cb->compressed_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 		kaddr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 		kunmap_atomic(kaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 		if (memcmp(&csum, cb_sum, csum_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 			btrfs_print_data_csum_error(inode, disk_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 					csum, cb_sum, cb->mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 			if (btrfs_io_bio(bio)->device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 				btrfs_dev_stat_inc_and_print(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 					btrfs_io_bio(bio)->device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 					BTRFS_DEV_STAT_CORRUPTION_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 			return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		cb_sum += csum_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) /* when we finish reading compressed pages from the disk, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185)  * decompress them and then run the bio end_io routines on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186)  * decompressed pages (in the inode address space).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188)  * This allows the checksumming and other IO error handling routines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189)  * to work normally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191)  * The compressed pages are freed here, and it must be run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192)  * in process context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) static void end_compressed_bio_read(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 	struct compressed_bio *cb = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 		cb->errors = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	/* if there are more bios still pending for this compressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	 * extent, just exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	if (!refcount_dec_and_test(&cb->pending_bios))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	 * Record the correct mirror_num in cb->orig_bio so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	 * read-repair can work properly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	cb->mirror_num = mirror;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	 * Some IO in this cb have failed, just skip checksum as there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	 * is no way it could be correct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	if (cb->errors == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 		goto csum_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	inode = cb->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	ret = check_compressed_csum(BTRFS_I(inode), bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 				    (u64)bio->bi_iter.bi_sector << 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 		goto csum_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	/* ok, we're the last bio for this extent, lets start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	 * the decompression.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	ret = btrfs_decompress_bio(cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) csum_failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 		cb->errors = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	/* release the compressed pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	for (index = 0; index < cb->nr_pages; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 		page = cb->compressed_pages[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 		page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	/* do io completion on the original bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 	if (cb->errors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 		bio_io_error(cb->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 		struct bio_vec *bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 		struct bvec_iter_all iter_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		 * we have verified the checksum already, set page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 		 * checked so the end_io handlers know about it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		ASSERT(!bio_flagged(bio, BIO_CLONED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 			SetPageChecked(bvec->bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 		bio_endio(cb->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 	/* finally free the cb struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	kfree(cb->compressed_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	kfree(cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275)  * Clear the writeback bits on all of the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276)  * pages for a compressed write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) static noinline void end_compressed_writeback(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 					      const struct compressed_bio *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	unsigned long index = cb->start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	struct page *pages[16];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	unsigned long nr_pages = end_index - index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	if (cb->errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 		mapping_set_error(inode->i_mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	while (nr_pages > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 		ret = find_get_pages_contig(inode->i_mapping, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 				     min_t(unsigned long,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 				     nr_pages, ARRAY_SIZE(pages)), pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 			nr_pages -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 			index += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		for (i = 0; i < ret; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 			if (cb->errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 				SetPageError(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 			end_page_writeback(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 			put_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		nr_pages -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 		index += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	/* the inode may be gone now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313)  * do the cleanup once all the compressed pages hit the disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314)  * This will clear writeback on the file pages and free the compressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315)  * pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317)  * This also calls the writeback end hooks for the file pages so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318)  * metadata and checksums can be updated in the file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) static void end_compressed_bio_write(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	struct compressed_bio *cb = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		cb->errors = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	/* if there are more bios still pending for this compressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	 * extent, just exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	if (!refcount_dec_and_test(&cb->pending_bios))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	/* ok, we're the last bio for this extent, step one is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	 * call back into the FS and do all the end_io operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	inode = cb->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 			cb->start, cb->start + cb->len - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 			!cb->errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	cb->compressed_pages[0]->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	end_compressed_writeback(inode, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	/* note, our inode could be gone now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	 * release the compressed pages, these came from alloc_page and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	 * are not attached to the inode at all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	for (index = 0; index < cb->nr_pages; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 		page = cb->compressed_pages[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	/* finally free the cb struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	kfree(cb->compressed_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	kfree(cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368)  * worker function to build and submit bios for previously compressed pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369)  * The corresponding pages in the inode should be marked for writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370)  * and the compressed pages should have a reference on them for dropping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371)  * when the IO is complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373)  * This also checksums the file bytes and gets things ready for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374)  * the end io hooks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 				 unsigned long len, u64 disk_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 				 unsigned long compressed_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 				 struct page **compressed_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 				 unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 				 unsigned int write_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 				 struct cgroup_subsys_state *blkcg_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	struct bio *bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	struct compressed_bio *cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	unsigned long bytes_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	int pg_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	u64 first_byte = disk_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	blk_status_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	WARN_ON(!PAGE_ALIGNED(start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	if (!cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 		return BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	refcount_set(&cb->pending_bios, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	cb->errors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	cb->inode = &inode->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	cb->start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	cb->len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	cb->mirror_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	cb->compressed_pages = compressed_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	cb->compressed_len = compressed_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	cb->orig_bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	cb->nr_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	bio = btrfs_bio_alloc(first_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	bio->bi_opf = REQ_OP_WRITE | write_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	bio->bi_private = cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	bio->bi_end_io = end_compressed_bio_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	if (blkcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		bio->bi_opf |= REQ_CGROUP_PUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		kthread_associate_blkcg(blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	refcount_set(&cb->pending_bios, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	/* create and submit bios for the compressed pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	bytes_left = compressed_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 		int submit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 		page = compressed_pages[pg_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 		page->mapping = inode->vfs_inode.i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		if (bio->bi_iter.bi_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 							  0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 		page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 		    PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 			 * inc the count before we submit the bio so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 			 * we know the end IO handler won't happen before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 			 * we inc the count.  Otherwise, the cb might get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 			 * freed before we're done setting it up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 			refcount_inc(&cb->pending_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 			ret = btrfs_bio_wq_end_io(fs_info, bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 						  BTRFS_WQ_ENDIO_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 			BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 			if (!skip_sum) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 				ret = btrfs_csum_one_bio(inode, bio, start, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 				BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 			ret = btrfs_map_bio(fs_info, bio, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 			if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 				bio->bi_status = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 				bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 			bio = btrfs_bio_alloc(first_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 			bio->bi_opf = REQ_OP_WRITE | write_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 			bio->bi_private = cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 			bio->bi_end_io = end_compressed_bio_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 			if (blkcg_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 				bio->bi_opf |= REQ_CGROUP_PUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 			bio_add_page(bio, page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 		if (bytes_left < PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 			btrfs_info(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 					"bytes left %lu compress len %lu nr %lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 			       bytes_left, cb->compressed_len, cb->nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		bytes_left -= PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		first_byte += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	if (!skip_sum) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		ret = btrfs_csum_one_bio(inode, bio, start, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 		BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	ret = btrfs_map_bio(fs_info, bio, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 		bio->bi_status = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 	if (blkcg_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 		kthread_associate_blkcg(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) static u64 bio_end_offset(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	struct bio_vec *last = bio_last_bvec_all(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	return page_offset(last->bv_page) + last->bv_len + last->bv_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) static noinline int add_ra_bio_pages(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 				     u64 compressed_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 				     struct compressed_bio *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	unsigned long end_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	unsigned long pg_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	u64 last_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	u64 isize = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	unsigned long nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	struct extent_map_tree *em_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	struct extent_io_tree *tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	u64 end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	int misses = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	last_offset = bio_end_offset(cb->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	em_tree = &BTRFS_I(inode)->extent_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	tree = &BTRFS_I(inode)->io_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	if (isize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	while (last_offset < compressed_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 		pg_index = last_offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 		if (pg_index > end_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		page = xa_load(&mapping->i_pages, pg_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		if (page && !xa_is_value(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 			misses++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 			if (misses > 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 			goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 		page = __page_cache_alloc(mapping_gfp_constraint(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 								 ~__GFP_FS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 		if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 			goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		end = last_offset + PAGE_SIZE - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 		 * at this point, we have a locked page in the page cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		 * for these bytes in the file.  But, we have to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 		 * sure they map to this compressed extent on disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 		set_page_extent_mapped(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		lock_extent(tree, last_offset, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 		read_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 		em = lookup_extent_mapping(em_tree, last_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 					   PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		read_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 		if (!em || last_offset < em->start ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		    (last_offset + PAGE_SIZE > extent_map_end(em)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		    (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 			free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 			unlock_extent(tree, last_offset, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 		free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		if (page->index == end_index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 			char *userpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 			size_t zero_offset = offset_in_page(isize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 			if (zero_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 				int zeros;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 				zeros = PAGE_SIZE - zero_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 				userpage = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 				memset(userpage + zero_offset, 0, zeros);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 				flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 				kunmap_atomic(userpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 		ret = bio_add_page(cb->orig_bio, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 				   PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		if (ret == PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 			nr_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 			unlock_extent(tree, last_offset, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 		last_offset += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609)  * for a compressed read, the bio we get passed has all the inode pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610)  * in it.  We don't actually do IO on those pages but allocate new ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611)  * to hold the compressed pages on disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613)  * bio->bi_iter.bi_sector points to the compressed extent on disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614)  * bio->bi_io_vec points to all of the inode pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616)  * After the compressed pages are read, we copy the bytes into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617)  * bio we were passed and then call the bio end_io calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 				 int mirror_num, unsigned long bio_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	struct extent_map_tree *em_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	struct compressed_bio *cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	unsigned long compressed_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	unsigned long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	unsigned long pg_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	struct bio *comp_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	u64 em_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	u64 em_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	blk_status_t ret = BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	int faili = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	u8 *sums;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	em_tree = &BTRFS_I(inode)->extent_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	/* we need the actual starting offset of this extent in the file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	read_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	em = lookup_extent_mapping(em_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 				   page_offset(bio_first_page_all(bio)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 				   PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	read_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	if (!em)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		return BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	compressed_len = em->block_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	if (!cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	refcount_set(&cb->pending_bios, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	cb->errors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	cb->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	cb->mirror_num = mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	sums = cb->sums;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	cb->start = em->orig_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	em_len = em->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	em_start = em->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	em = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	cb->len = bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	cb->compressed_len = compressed_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	cb->compress_type = extent_compress_type(bio_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 	cb->orig_bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 				       GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	if (!cb->compressed_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		goto fail1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 							      __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		if (!cb->compressed_pages[pg_index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 			faili = pg_index - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 			ret = BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 			goto fail2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	faili = nr_pages - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	cb->nr_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	add_ra_bio_pages(inode, em_start + em_len, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	/* include any pages we added in add_ra-bio_pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	cb->len = bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	comp_bio = btrfs_bio_alloc(cur_disk_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	comp_bio->bi_opf = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	comp_bio->bi_private = cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	comp_bio->bi_end_io = end_compressed_bio_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	refcount_set(&cb->pending_bios, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 		int submit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 		page = cb->compressed_pages[pg_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 		page->mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 		page->index = em_start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 		if (comp_bio->bi_iter.bi_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 							  comp_bio, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		page->mapping = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 		if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		    PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 			unsigned int nr_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 			ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 						  BTRFS_WQ_ENDIO_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 			BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 			 * inc the count before we submit the bio so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 			 * we know the end IO handler won't happen before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 			 * we inc the count.  Otherwise, the cb might get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 			 * freed before we're done setting it up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 			refcount_inc(&cb->pending_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 			if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 				ret = btrfs_lookup_bio_sums(inode, comp_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 							    (u64)-1, sums);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 				BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 			nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 						  fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 			sums += csum_size * nr_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 			ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 			if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 				comp_bio->bi_status = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 				bio_endio(comp_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 			comp_bio = btrfs_bio_alloc(cur_disk_byte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 			comp_bio->bi_opf = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 			comp_bio->bi_private = cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 			comp_bio->bi_end_io = end_compressed_bio_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 			bio_add_page(comp_bio, page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 		cur_disk_byte += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 		ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		BUG_ON(ret); /* -ENOMEM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		comp_bio->bi_status = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 		bio_endio(comp_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) fail2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	while (faili >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		__free_page(cb->compressed_pages[faili]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 		faili--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	kfree(cb->compressed_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) fail1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	kfree(cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787)  * Heuristic uses systematic sampling to collect data from the input data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788)  * range, the logic can be tuned by the following constants:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790)  * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791)  * @SAMPLING_INTERVAL  - range from which the sampled data can be collected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) #define SAMPLING_READ_SIZE	(16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) #define SAMPLING_INTERVAL	(256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797)  * For statistical analysis of the input data we consider bytes that form a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798)  * Galois Field of 256 objects. Each object has an attribute count, ie. how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799)  * many times the object appeared in the sample.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) #define BUCKET_SIZE		(256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804)  * The size of the sample is based on a statistical sampling rule of thumb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805)  * The common way is to perform sampling tests as long as the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806)  * elements in each cell is at least 5.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)  * Instead of 5, we choose 32 to obtain more accurate results.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809)  * If the data contain the maximum number of symbols, which is 256, we obtain a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810)  * sample size bound by 8192.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  * For a sample of at most 8KB of data per data range: 16 consecutive bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  * from up to 512 locations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) #define MAX_SAMPLE_SIZE		(BTRFS_MAX_UNCOMPRESSED *		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 				 SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) struct bucket_item {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	u32 count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) struct heuristic_ws {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	/* Partial copy of input data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	u8 *sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	u32 sample_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	/* Buckets store counters for each byte value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	struct bucket_item *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	/* Sorting buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	struct bucket_item *bucket_b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) static struct workspace_manager heuristic_wsm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) static void free_heuristic_ws(struct list_head *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	struct heuristic_ws *workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	workspace = list_entry(ws, struct heuristic_ws, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	kvfree(workspace->sample);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 	kfree(workspace->bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	kfree(workspace->bucket_b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	kfree(workspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) static struct list_head *alloc_heuristic_ws(unsigned int level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	struct heuristic_ws *ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	if (!ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	if (!ws->sample)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	if (!ws->bucket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	if (!ws->bucket_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	INIT_LIST_HEAD(&ws->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	return &ws->list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	free_heuristic_ws(&ws->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) const struct btrfs_compress_op btrfs_heuristic_compress = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	.workspace_manager = &heuristic_wsm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) static const struct btrfs_compress_op * const btrfs_compress_op[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	/* The heuristic is represented as compression type 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	&btrfs_heuristic_compress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	&btrfs_zlib_compress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	&btrfs_lzo_compress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	&btrfs_zstd_compress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) static struct list_head *alloc_workspace(int type, unsigned int level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	case BTRFS_COMPRESS_LZO:  return lzo_alloc_workspace(level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		 * This can't happen, the type is validated several times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		 * before we get here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) static void free_workspace(int type, struct list_head *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	case BTRFS_COMPRESS_LZO:  return lzo_free_workspace(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		 * This can't happen, the type is validated several times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		 * before we get here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) static void btrfs_init_workspace_manager(int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	struct workspace_manager *wsm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	struct list_head *workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	wsm = btrfs_compress_op[type]->workspace_manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	INIT_LIST_HEAD(&wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	spin_lock_init(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	atomic_set(&wsm->total_ws, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	init_waitqueue_head(&wsm->ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	 * Preallocate one workspace for each compression type so we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	 * guarantee forward progress in the worst case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	workspace = alloc_workspace(type, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	if (IS_ERR(workspace)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 		pr_warn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	"BTRFS: cannot preallocate compression workspace, will try later\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 		atomic_set(&wsm->total_ws, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 		wsm->free_ws = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 		list_add(workspace, &wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) static void btrfs_cleanup_workspace_manager(int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	struct workspace_manager *wsman;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	struct list_head *ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	wsman = btrfs_compress_op[type]->workspace_manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	while (!list_empty(&wsman->idle_ws)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 		ws = wsman->idle_ws.next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		list_del(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 		free_workspace(type, ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		atomic_dec(&wsman->total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959)  * This finds an available workspace or allocates a new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960)  * If it's not possible to allocate a new one, waits until there's one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961)  * Preallocation makes a forward progress guarantees and we do not return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962)  * errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) struct list_head *btrfs_get_workspace(int type, unsigned int level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	struct workspace_manager *wsm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 	struct list_head *workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	int cpus = num_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	unsigned nofs_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	struct list_head *idle_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	spinlock_t *ws_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	atomic_t *total_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	wait_queue_head_t *ws_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	int *free_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	wsm = btrfs_compress_op[type]->workspace_manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	idle_ws	 = &wsm->idle_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	ws_lock	 = &wsm->ws_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	total_ws = &wsm->total_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	ws_wait	 = &wsm->ws_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	free_ws	 = &wsm->free_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	spin_lock(ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	if (!list_empty(idle_ws)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		workspace = idle_ws->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 		list_del(workspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 		(*free_ws)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 		spin_unlock(ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 		return workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	if (atomic_read(total_ws) > cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 		spin_unlock(ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 		prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 		if (atomic_read(total_ws) > cpus && !*free_ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 			schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 		finish_wait(ws_wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 		goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	atomic_inc(total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	spin_unlock(ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	 * Allocation helpers call vmalloc that can't use GFP_NOFS, so we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	 * to turn it off here because we might get called from the restricted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	 * context of btrfs_compress_bio/btrfs_compress_pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	nofs_flag = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	workspace = alloc_workspace(type, level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	memalloc_nofs_restore(nofs_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	if (IS_ERR(workspace)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 		atomic_dec(total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 		wake_up(ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 		 * Do not return the error but go back to waiting. There's a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		 * workspace preallocated for each type and the compression
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		 * time is bounded so we get to a workspace eventually. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 		 * makes our caller's life easier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 		 * To prevent silent and low-probability deadlocks (when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 		 * initial preallocation fails), check if there are any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 		 * workspaces at all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 		if (atomic_read(total_ws) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 			static DEFINE_RATELIMIT_STATE(_rs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 					/* once per minute */ 60 * HZ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 					/* no burst */ 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 			if (__ratelimit(&_rs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 				pr_warn("BTRFS: no compression workspaces, low memory, retrying\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 		goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	return workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) static struct list_head *get_workspace(int type, int level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	case BTRFS_COMPRESS_LZO:  return btrfs_get_workspace(type, level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		 * This can't happen, the type is validated several times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 		 * before we get here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)  * put a workspace struct back on the list or free it if we have enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)  * idle ones sitting around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) void btrfs_put_workspace(int type, struct list_head *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	struct workspace_manager *wsm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	struct list_head *idle_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	spinlock_t *ws_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	atomic_t *total_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	wait_queue_head_t *ws_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	int *free_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	wsm = btrfs_compress_op[type]->workspace_manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	idle_ws	 = &wsm->idle_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	ws_lock	 = &wsm->ws_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	total_ws = &wsm->total_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	ws_wait	 = &wsm->ws_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	free_ws	 = &wsm->free_ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	spin_lock(ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	if (*free_ws <= num_online_cpus()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 		list_add(ws, idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 		(*free_ws)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 		spin_unlock(ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		goto wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 	spin_unlock(ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	free_workspace(type, ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	atomic_dec(total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) wake:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	cond_wake_up(ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) static void put_workspace(int type, struct list_head *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	case BTRFS_COMPRESS_LZO:  return btrfs_put_workspace(type, ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 		 * This can't happen, the type is validated several times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 		 * before we get here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)  * Adjust @level according to the limits of the compression algorithm or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)  * fallback to default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) static unsigned int btrfs_compress_set_level(int type, unsigned level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	const struct btrfs_compress_op *ops = btrfs_compress_op[type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	if (level == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 		level = ops->default_level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 		level = min(level, ops->max_level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	return level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)  * Given an address space and start and length, compress the bytes into @pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)  * that are allocated on demand.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)  * @type_level is encoded algorithm and level, where level 0 means whatever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)  * default the algorithm chooses and is opaque here;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)  * - compression algo are 0-3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)  * - the level are bits 4-7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)  * @out_pages is an in/out parameter, holds maximum number of pages to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)  * and returns number of actually allocated pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)  * @total_in is used to return the number of bytes actually read.  It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)  * may be smaller than the input length if we had to exit early because we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)  * ran out of room in the pages array or because we cross the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141)  * max_out threshold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)  * @total_out is an in/out parameter, must be set to the input length and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)  * be also used to return the total number of compressed bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)  * @max_out tells us the max number of bytes that we're allowed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)  * stuff into pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 			 u64 start, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 			 unsigned long *out_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 			 unsigned long *total_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 			 unsigned long *total_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	int type = btrfs_compress_type(type_level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	int level = btrfs_compress_level(type_level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	struct list_head *workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	level = btrfs_compress_set_level(type, level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	workspace = get_workspace(type, level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	ret = compression_compress_pages(type, workspace, mapping, start, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 					 out_pages, total_in, total_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	put_workspace(type, workspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)  * pages_in is an array of pages with compressed data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)  * disk_start is the starting logical offset of this array in the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)  * orig_bio contains the pages from the file that we want to decompress into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175)  * srclen is the number of bytes in pages_in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)  * The basic idea is that we have a bio that was created by readpages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)  * The pages in the bio are for the uncompressed data, and they may not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)  * be contiguous.  They all correspond to the range of bytes covered by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)  * the compressed extent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) static int btrfs_decompress_bio(struct compressed_bio *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	struct list_head *workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	int type = cb->compress_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	workspace = get_workspace(type, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	ret = compression_decompress_bio(type, workspace, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	put_workspace(type, workspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)  * a less complex decompression routine.  Our compressed data fits in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)  * single page, and we want to read a single page out of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)  * start_byte tells us the offset into the compressed data we're interested in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 		     unsigned long start_byte, size_t srclen, size_t destlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	struct list_head *workspace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	workspace = get_workspace(type, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	ret = compression_decompress(type, workspace, data_in, dest_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 				     start_byte, srclen, destlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	put_workspace(type, workspace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) void __init btrfs_init_compress(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	zstd_init_workspace_manager();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) void __cold btrfs_exit_compress(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	zstd_cleanup_workspace_manager();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)  * Copy uncompressed data from working buffer to pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)  * buf_start is the byte offset we're of the start of our workspace buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235)  * total_out is the last byte of the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 			      unsigned long total_out, u64 disk_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 			      struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	unsigned long buf_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	unsigned long current_buf_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	unsigned long start_byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	unsigned long prev_start_byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	unsigned long working_bytes = total_out - buf_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	unsigned long bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	char *kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	 * start byte is the first byte of the page we're currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	 * copying into relative to the start of the compressed data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	start_byte = page_offset(bvec.bv_page) - disk_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	/* we haven't yet hit data corresponding to this page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	if (total_out <= start_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	 * the start of the data we care about is offset into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	 * the middle of our working buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	if (total_out > start_byte && buf_start < start_byte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		buf_offset = start_byte - buf_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		working_bytes -= buf_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		buf_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	current_buf_start = buf_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	/* copy bytes from the working buffer into the pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	while (working_bytes > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		bytes = min_t(unsigned long, bvec.bv_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 				PAGE_SIZE - (buf_offset % PAGE_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 		bytes = min(bytes, working_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 		kaddr = kmap_atomic(bvec.bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 		memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 		kunmap_atomic(kaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 		flush_dcache_page(bvec.bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		buf_offset += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		working_bytes -= bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		current_buf_start += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 		/* check if we need to pick another page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 		bio_advance(bio, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 		if (!bio->bi_iter.bi_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		bvec = bio_iter_iovec(bio, bio->bi_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		prev_start_byte = start_byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		start_byte = page_offset(bvec.bv_page) - disk_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 		 * We need to make sure we're only adjusting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 		 * our offset into compression working buffer when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		 * we're switching pages.  Otherwise we can incorrectly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 		 * keep copying when we were actually done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 		if (start_byte != prev_start_byte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 			 * make sure our new page is covered by this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 			 * working buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 			if (total_out <= start_byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 				return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 			 * the next page in the biovec might not be adjacent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 			 * to the last page, but it might still be found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 			 * inside this working buffer. bump our offset pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 			if (total_out > start_byte &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 			    current_buf_start < start_byte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 				buf_offset = start_byte - buf_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 				working_bytes = total_out - start_byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 				current_buf_start = buf_start + buf_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)  * Shannon Entropy calculation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)  * Pure byte distribution analysis fails to determine compressibility of data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)  * Try calculating entropy to estimate the average minimum number of bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)  * needed to encode the sampled data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)  * For convenience, return the percentage of needed bits, instead of amount of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)  * bits directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)  * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)  *			    and can be compressible with high probability
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)  * @ENTROPY_LVL_HIGH - data are not compressible with high probability
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)  * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) #define ENTROPY_LVL_ACEPTABLE		(65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) #define ENTROPY_LVL_HIGH		(80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347)  * For increasead precision in shannon_entropy calculation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)  * let's do pow(n, M) to save more digits after comma:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)  * - maximum int bit length is 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)  * - ilog2(MAX_SAMPLE_SIZE)	-> 13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)  * - 13 * 4 = 52 < 64		-> M = 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)  * So use pow(n, 4).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) static inline u32 ilog2_w(u64 n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	return ilog2(n * n * n * n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) static u32 shannon_entropy(struct heuristic_ws *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	const u32 entropy_max = 8 * ilog2_w(2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	u32 entropy_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	u32 p, p_base, sz_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	sz_base = ilog2_w(ws->sample_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		p = ws->bucket[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 		p_base = ilog2_w(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 		entropy_sum += p * (sz_base - p_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	entropy_sum /= ws->sample_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	return entropy_sum * 100 / entropy_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) #define RADIX_BASE		4U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) #define COUNTERS_SIZE		(1U << RADIX_BASE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) static u8 get4bits(u64 num, int shift) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	u8 low4bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	num >>= shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	/* Reverse order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	return low4bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)  * Use 4 bits as radix base
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)  * Use 16 u32 counters for calculating new position in buf array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395)  * @array     - array that will be sorted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)  * @array_buf - buffer array to store sorting results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)  *              must be equal in size to @array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398)  * @num       - array size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 		       int num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	u64 max_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	u64 buf_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	u32 counters[COUNTERS_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	u32 new_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	u32 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	int bitlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	int shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	 * Try avoid useless loop iterations for small numbers stored in big
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	 * counters.  Example: 48 33 4 ... in 64bit array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	max_num = array[0].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	for (i = 1; i < num; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 		buf_num = array[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 		if (buf_num > max_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 			max_num = buf_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 	buf_num = ilog2(max_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	bitlen = ALIGN(buf_num, RADIX_BASE * 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 	shift = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 	while (shift < bitlen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		memset(counters, 0, sizeof(counters));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 		for (i = 0; i < num; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 			buf_num = array[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 			addr = get4bits(buf_num, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 			counters[addr]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 		for (i = 1; i < COUNTERS_SIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 			counters[i] += counters[i - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 		for (i = num - 1; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 			buf_num = array[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 			addr = get4bits(buf_num, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 			counters[addr]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 			new_addr = counters[addr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 			array_buf[new_addr] = array[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 		shift += RADIX_BASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 		 * Normal radix expects to move data from a temporary array, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 		 * the main one.  But that requires some CPU time. Avoid that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 		 * by doing another sort iteration to original array instead of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 		 * memcpy()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 		memset(counters, 0, sizeof(counters));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 		for (i = 0; i < num; i ++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 			buf_num = array_buf[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 			addr = get4bits(buf_num, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 			counters[addr]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 		for (i = 1; i < COUNTERS_SIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 			counters[i] += counters[i - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		for (i = num - 1; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 			buf_num = array_buf[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 			addr = get4bits(buf_num, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 			counters[addr]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 			new_addr = counters[addr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 			array[new_addr] = array_buf[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 		shift += RADIX_BASE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479)  * Size of the core byte set - how many bytes cover 90% of the sample
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)  * There are several types of structured binary data that use nearly all byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)  * values. The distribution can be uniform and counts in all buckets will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)  * nearly the same (eg. encrypted data). Unlikely to be compressible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)  * Other possibility is normal (Gaussian) distribution, where the data could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486)  * be potentially compressible, but we have to take a few more steps to decide
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)  * how much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489)  * @BYTE_CORE_SET_LOW  - main part of byte values repeated frequently,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490)  *                       compression algo can easy fix that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)  * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492)  *                       probability is not compressible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) #define BYTE_CORE_SET_LOW		(64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) #define BYTE_CORE_SET_HIGH		(200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) static int byte_core_set_size(struct heuristic_ws *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	u32 coreset_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	const u32 core_set_threshold = ws->sample_size * 90 / 100;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	struct bucket_item *bucket = ws->bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 	/* Sort in reverse order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	for (i = 0; i < BYTE_CORE_SET_LOW; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 		coreset_sum += bucket[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	if (coreset_sum > core_set_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 		return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 		coreset_sum += bucket[i].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 		if (coreset_sum > core_set_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523)  * Count byte values in buckets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)  * This heuristic can detect textual data (configs, xml, json, html, etc).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525)  * Because in most text-like data byte set is restricted to limited number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526)  * possible characters, and that restriction in most cases makes data easy to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527)  * compress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529)  * @BYTE_SET_THRESHOLD - consider all data within this byte set size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530)  *	less - compressible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531)  *	more - need additional analysis
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) #define BYTE_SET_THRESHOLD		(64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) static u32 byte_set_size(const struct heuristic_ws *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 	u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	u32 byte_set_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 	for (i = 0; i < BYTE_SET_THRESHOLD; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 		if (ws->bucket[i].count > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 			byte_set_size++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 	 * Continue collecting count of byte values in buckets.  If the byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 	 * set size is bigger then the threshold, it's pointless to continue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	 * the detection technique would fail for this type of data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 	for (; i < BUCKET_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		if (ws->bucket[i].count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 			byte_set_size++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 			if (byte_set_size > BYTE_SET_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 				return byte_set_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	return byte_set_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) static bool sample_repeated_patterns(struct heuristic_ws *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	const u32 half_of_sample = ws->sample_size / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 	const u8 *data = ws->sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 	return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 				     struct heuristic_ws *ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 	u64 index, index_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 	u32 i, curr_sample_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 	u8 *in_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 	 * Compression handles the input data by chunks of 128KiB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 	 * (defined by BTRFS_MAX_UNCOMPRESSED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	 * We do the same for the heuristic and loop over the whole range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	 * MAX_SAMPLE_SIZE - calculated under assumption that heuristic will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	 * process no more than BTRFS_MAX_UNCOMPRESSED at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	if (end - start > BTRFS_MAX_UNCOMPRESSED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 		end = start + BTRFS_MAX_UNCOMPRESSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	index = start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	index_end = end >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	/* Don't miss unaligned end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	if (!IS_ALIGNED(end, PAGE_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 		index_end++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 	curr_sample_pos = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 	while (index < index_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 		page = find_get_page(inode->i_mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 		in_data = kmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 		/* Handle case where the start is not aligned to PAGE_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 		i = start % PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 		while (i < PAGE_SIZE - SAMPLING_READ_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 			/* Don't sample any garbage from the last page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 			if (start > end - SAMPLING_READ_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 			memcpy(&ws->sample[curr_sample_pos], &in_data[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 					SAMPLING_READ_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 			i += SAMPLING_INTERVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 			start += SAMPLING_INTERVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 			curr_sample_pos += SAMPLING_READ_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 		kunmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 		index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	ws->sample_size = curr_sample_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622)  * Compression heuristic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624)  * For now is's a naive and optimistic 'return true', we'll extend the logic to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)  * quickly (compared to direct compression) detect data characteristics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626)  * (compressible/uncompressible) to avoid wasting CPU time on uncompressible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)  * data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)  * The following types of analysis can be performed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)  * - detect mostly zero data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)  * - detect data with low "byte set" size (text, etc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)  * - detect data with low/high "core byte" set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)  * Return non-zero if the compression should be done, 0 otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	struct list_head *ws_list = get_workspace(0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	struct heuristic_ws *ws;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 	u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 	u8 byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	ws = list_entry(ws_list, struct heuristic_ws, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	heuristic_collect_sample(inode, start, end, ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 	if (sample_repeated_patterns(ws)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 		ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 	memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	for (i = 0; i < ws->sample_size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		byte = ws->sample[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 		ws->bucket[byte].count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 	i = byte_set_size(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	if (i < BYTE_SET_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		ret = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 	i = byte_core_set_size(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 	if (i <= BYTE_CORE_SET_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 		ret = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 	if (i >= BYTE_CORE_SET_HIGH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 	i = shannon_entropy(ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	if (i <= ENTROPY_LVL_ACEPTABLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 		ret = 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 	 * For the levels below ENTROPY_LVL_HIGH, additional analysis would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	 * needed to give green light to compression.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	 * For now just assume that compression at that level is not worth the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	 * resources because:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 	 * 1. it is possible to defrag the data later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	 * 2. the data would turn out to be hardly compressible, eg. 150 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 	 * values, every bucket has counter at level ~54. The heuristic would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 	 * be confused. This can happen when data have some internal repeated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	 * patterns like "abbacbbc...". This can be detected by analyzing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	 * pairs of bytes, which is too costly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	if (i < ENTROPY_LVL_HIGH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 		ret = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	put_workspace(0, ws_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)  * Convert the compression suffix (eg. after "zlib" starting with ":") to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)  * level, unrecognized string will set the default level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	unsigned int level = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	if (!type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	if (str[0] == ':') {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 		ret = kstrtouint(str + 1, 10, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 			level = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	level = btrfs_compress_set_level(type, level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	return level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) }