Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Code for working with individual keys, and sorted sets of keys with in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * btree node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  * Copyright 2012 Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #define pr_fmt(fmt) "bcache: %s() " fmt, __func__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include "util.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include "bset.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/console.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/sched/clock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #ifdef CONFIG_BCACHE_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) 	struct bkey *k, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) 	for (k = i->start; k < bset_bkey_last(i); k = next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) 		next = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) 		pr_err("block %u key %u/%u: ", set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) 		       (unsigned int) ((u64 *) k - i->d), i->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 		if (b->ops->key_dump)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 			b->ops->key_dump(b, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) 			pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 		if (next < bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 		    bkey_cmp(k, b->ops->is_extents ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 			     &START_KEY(next) : next) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 			pr_err("Key skipped backwards\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) void bch_dump_bucket(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 	unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 	console_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	for (i = 0; i <= b->nsets; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 		bch_dump_bset(b, b->set[i].data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 			      bset_sector_offset(b, b->set[i].data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 	console_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) int __bch_count_data(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 	unsigned int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	struct bkey *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 	if (b->ops->is_extents)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 		for_each_key(b, k, &iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 			ret += KEY_SIZE(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	va_list args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	struct bkey *k, *p = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	const char *err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	for_each_key(b, k, &iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 		if (b->ops->is_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 			err = "Keys out of order";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 			if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 				goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 			if (bch_ptr_invalid(b, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 			err =  "Overlapping keys";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 			if (p && bkey_cmp(p, &START_KEY(k)) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 				goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 			if (bch_ptr_bad(b, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 			err = "Duplicate keys";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 			if (p && !bkey_cmp(p, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 				goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 		p = k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	err = "Key larger than btree node key";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	if (p && bkey_cmp(p, &b->key) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 		goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) bug:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	bch_dump_bucket(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	va_start(args, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	vprintk(fmt, args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	va_end(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	panic("bch_check_keys error:  %s:\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) static void bch_btree_iter_next_check(struct btree_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	struct bkey *k = iter->data->k, *next = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	if (next < iter->data->end &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	    bkey_cmp(k, iter->b->ops->is_extents ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 		     &START_KEY(next) : next) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 		bch_dump_bucket(iter->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 		panic("Key skipped backwards\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) /* Keylists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	size_t oldsize = bch_keylist_nkeys(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	size_t newsize = oldsize + u64s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	uint64_t *new_keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	newsize = roundup_pow_of_two(newsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	if (newsize <= KEYLIST_INLINE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	    roundup_pow_of_two(oldsize) == newsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	new_keys = krealloc(old_keys, sizeof(uint64_t) * newsize, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	if (!new_keys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	if (!old_keys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 		memcpy(new_keys, l->inline_keys, sizeof(uint64_t) * oldsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	l->keys_p = new_keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	l->top_p = new_keys + oldsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) /* Pop the top key of keylist by pointing l->top to its previous key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) struct bkey *bch_keylist_pop(struct keylist *l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	struct bkey *k = l->keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	if (k == l->top)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	while (bkey_next(k) != l->top)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	return l->top = k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) /* Pop the bottom key of keylist and update l->top_p */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) void bch_keylist_pop_front(struct keylist *l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	l->top_p -= bkey_u64s(l->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	memmove(l->keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 		bkey_next(l->keys),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		bch_keylist_bytes(l));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) /* Key/pointer manipulation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 			      unsigned int i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	BUG_ON(i > KEY_PTRS(src));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	/* Only copy the header, key, and one pointer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	memcpy(dest, src, 2 * sizeof(uint64_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	dest->ptr[0] = src->ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	SET_KEY_PTRS(dest, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	/* We didn't copy the checksum so clear that bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 	SET_KEY_CSUM(dest, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) bool __bch_cut_front(const struct bkey *where, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	unsigned int i, len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	if (bkey_cmp(where, &START_KEY(k)) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	if (bkey_cmp(where, k) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 		len = KEY_OFFSET(k) - KEY_OFFSET(where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 		bkey_copy_key(k, where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 		SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + KEY_SIZE(k) - len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	BUG_ON(len > KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	SET_KEY_SIZE(k, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) bool __bch_cut_back(const struct bkey *where, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	unsigned int len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	if (bkey_cmp(where, k) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	BUG_ON(KEY_INODE(where) != KEY_INODE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	if (bkey_cmp(where, &START_KEY(k)) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 		len = KEY_OFFSET(where) - KEY_START(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	bkey_copy_key(k, where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	BUG_ON(len > KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	SET_KEY_SIZE(k, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) /* Auxiliary search trees */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) /* 32 bits total: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) #define BKEY_MID_BITS		3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) #define BKEY_EXPONENT_BITS	7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) #define BKEY_MANTISSA_BITS	(32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) #define BKEY_MANTISSA_MASK	((1 << BKEY_MANTISSA_BITS) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) struct bkey_float {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	unsigned int	exponent:BKEY_EXPONENT_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	unsigned int	m:BKEY_MID_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	unsigned int	mantissa:BKEY_MANTISSA_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) } __packed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251)  * BSET_CACHELINE was originally intended to match the hardware cacheline size -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252)  * it used to be 64, but I realized the lookup code would touch slightly less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253)  * memory if it was 128.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255)  * It definites the number of bytes (in struct bset) per struct bkey_float in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256)  * the auxiliar search tree - when we're done searching the bset_float tree we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257)  * have this many bytes left that we do a linear search over.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259)  * Since (after level 5) every level of the bset_tree is on a new cacheline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260)  * we're touching one fewer cacheline in the bset tree in exchange for one more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261)  * cacheline in the linear search - but the linear search might stop before it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262)  * gets to the second cacheline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) #define BSET_CACHELINE		128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) /* Space required for the btree node keys */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) static inline size_t btree_keys_bytes(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 	return PAGE_SIZE << b->page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) static inline size_t btree_keys_cachelines(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	return btree_keys_bytes(b) / BSET_CACHELINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) /* Space required for the auxiliary search trees */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) static inline size_t bset_tree_bytes(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	return btree_keys_cachelines(b) * sizeof(struct bkey_float);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) /* Space required for the prev pointers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) static inline size_t bset_prev_bytes(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	return btree_keys_cachelines(b) * sizeof(uint8_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) /* Memory allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) void bch_btree_keys_free(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	struct bset_tree *t = b->set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	if (bset_prev_bytes(b) < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 		kfree(t->prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		free_pages((unsigned long) t->prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 			   get_order(bset_prev_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	if (bset_tree_bytes(b) < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		kfree(t->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		free_pages((unsigned long) t->tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 			   get_order(bset_tree_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	free_pages((unsigned long) t->data, b->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	t->prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	t->tree = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	t->data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) int bch_btree_keys_alloc(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 			 unsigned int page_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 			 gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	struct bset_tree *t = b->set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	BUG_ON(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	b->page_order = page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	t->data = (void *) __get_free_pages(__GFP_COMP|gfp, b->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	if (!t->data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	t->tree = bset_tree_bytes(b) < PAGE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 		? kmalloc(bset_tree_bytes(b), gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 		: (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	if (!t->tree)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	t->prev = bset_prev_bytes(b) < PAGE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 		? kmalloc(bset_prev_bytes(b), gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 		: (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	if (!t->prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	bch_btree_keys_free(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 			 bool *expensive_debug_checks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	b->ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	b->expensive_debug_checks = expensive_debug_checks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	b->nsets = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	b->last_set_unwritten = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	 * struct btree_keys in embedded in struct btree, and struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	 * bset_tree is embedded into struct btree_keys. They are all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	 * initialized as 0 by kzalloc() in mca_bucket_alloc(), and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	 * b->set[0].data is allocated in bch_btree_keys_alloc(), so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	 * don't have to initiate b->set[].size and b->set[].data here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	 * any more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) /* Binary tree stuff for auxiliary search trees */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368)  * return array index next to j when does in-order traverse
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369)  * of a binary tree which is stored in a linear array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) static unsigned int inorder_next(unsigned int j, unsigned int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	if (j * 2 + 1 < size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 		j = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 		while (j * 2 < size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 			j *= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 		j >>= ffz(j) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385)  * return array index previous to j when does in-order traverse
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386)  * of a binary tree which is stored in a linear array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) static unsigned int inorder_prev(unsigned int j, unsigned int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	if (j * 2 < size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		j = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 		while (j * 2 + 1 < size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 			j = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		j >>= ffs(j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402)  * I have no idea why this code works... and I'm the one who wrote it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404)  * However, I do know what it does:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405)  * Given a binary tree constructed in an array (i.e. how you normally implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406)  * a heap), it converts a node in the tree - referenced by array index - to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407)  * index it would have if you did an inorder traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409)  * Also tested for every j, size up to size somewhere around 6 million.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411)  * The binary tree starts at array index 1, not 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412)  * extra is a function of size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413)  *   extra = (size - rounddown_pow_of_two(size - 1)) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) static unsigned int __to_inorder(unsigned int j,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 				  unsigned int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 				  unsigned int extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	unsigned int b = fls(j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	unsigned int shift = fls(size - 1) - b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	j  ^= 1U << (b - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	j <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	j  |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	j <<= shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	if (j > extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		j -= (j - extra) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434)  * Return the cacheline index in bset_tree->data, where j is index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435)  * from a linear array which stores the auxiliar binary tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) static unsigned int to_inorder(unsigned int j, struct bset_tree *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	return __to_inorder(j, t->size, t->extra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) static unsigned int __inorder_to_tree(unsigned int j,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 				      unsigned int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 				      unsigned int extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	unsigned int shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	if (j > extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 		j += j - extra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	shift = ffs(j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	j >>= shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	j  |= roundup_pow_of_two(size) >> shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460)  * Return an index from a linear array which stores the auxiliar binary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461)  * tree, j is the cacheline index of t->data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) static unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	return __inorder_to_tree(j, t->size, t->extra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) void inorder_test(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	unsigned long done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	ktime_t start = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	for (unsigned int size = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	     size < 65536000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	     size++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		unsigned int extra =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 			(size - rounddown_pow_of_two(size - 1)) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 		unsigned int i = 1, j = rounddown_pow_of_two(size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		if (!(size % 4096))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 			pr_notice("loop %u, %llu per us\n", size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 			       done / ktime_us_delta(ktime_get(), start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 			if (__inorder_to_tree(i, size, extra) != j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 				panic("size %10u j %10u i %10u", size, j, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 			if (__to_inorder(j, size, extra) != i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 				panic("size %10u j %10u i %10u", size, j, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 			if (j == rounddown_pow_of_two(size) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 			BUG_ON(inorder_prev(inorder_next(j, size), size) != j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 			j = inorder_next(j, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 			i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 		done += size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507)  * Cacheline/offset <-> bkey pointer arithmetic:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509)  * t->tree is a binary search tree in an array; each node corresponds to a key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510)  * in one cacheline in t->set (BSET_CACHELINE bytes).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512)  * This means we don't have to store the full index of the key that a node in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513)  * the binary tree points to; to_inorder() gives us the cacheline, and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514)  * bkey_float->m gives us the offset within that cacheline, in units of 8 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516)  * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517)  * make this work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519)  * To construct the bfloat for an arbitrary key we need to know what the key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520)  * immediately preceding it is: we have to check if the two keys differ in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521)  * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522)  * of the previous key so we can walk backwards to it from t->tree[j]'s key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) static struct bkey *cacheline_to_bkey(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 				      unsigned int cacheline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 				      unsigned int offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) static unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	return ((void *) k - (void *) t->data) / BSET_CACHELINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) static unsigned int bkey_to_cacheline_offset(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 					 unsigned int cacheline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 					 struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555)  * For the write set - the one we're currently inserting keys into - we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556)  * maintain a full search tree, we just keep a simple lookup table in t->prev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) static struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	return cacheline_to_bkey(t, cacheline, t->prev[cacheline]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	low >>= shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	low  |= (high << 1) << (63U - shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	return low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571)  * Calculate mantissa value for struct bkey_float.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572)  * If most significant bit of f->exponent is not set, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573)  *  - f->exponent >> 6 is 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574)  *  - p[0] points to bkey->low
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575)  *  - p[-1] borrows bits from KEY_INODE() of bkey->high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576)  * if most isgnificant bits of f->exponent is set, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577)  *  - f->exponent >> 6 is 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578)  *  - p[0] points to bits from KEY_INODE() of bkey->high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579)  *  - p[-1] points to other bits from KEY_INODE() of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580)  *    bkey->high too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581)  * See make_bfloat() to check when most significant bit of f->exponent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582)  * is set or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) static inline unsigned int bfloat_mantissa(const struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 				       struct bkey_float *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	const uint64_t *p = &k->low - (f->exponent >> 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) static void make_bfloat(struct bset_tree *t, unsigned int j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	struct bkey_float *f = &t->tree[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	struct bkey *m = tree_to_bkey(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	struct bkey *p = tree_to_prev_bkey(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	struct bkey *l = is_power_of_2(j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		? t->data->start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		: tree_to_prev_bkey(t, j >> ffs(j));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	struct bkey *r = is_power_of_2(j + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 		? bset_bkey_idx(t->data, t->data->keys - bkey_u64s(&t->end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		: tree_to_bkey(t, j >> (ffz(j) + 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	BUG_ON(m < l || m > r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	BUG_ON(bkey_next(p) != m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	 * If l and r have different KEY_INODE values (different backing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	 * device), f->exponent records how many least significant bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	 * are different in KEY_INODE values and sets most significant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	 * bits to 1 (by +64).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	 * If l and r have same KEY_INODE value, f->exponent records
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	 * how many different bits in least significant bits of bkey->low.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	 * See bfloat_mantiss() how the most significant bit of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	 * f->exponent is used to calculate bfloat mantissa value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	if (KEY_INODE(l) != KEY_INODE(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 		f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 		f->exponent = fls64(r->low ^ l->low);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	f->exponent = max_t(int, f->exponent - BKEY_MANTISSA_BITS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	 * Setting f->exponent = 127 flags this node as failed, and causes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	 * lookup code to fall back to comparing against the original key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	if (bfloat_mantissa(m, f) != bfloat_mantissa(p, f))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		f->mantissa = bfloat_mantissa(m, f) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 		f->exponent = 127;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	if (t != b->set) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 		unsigned int j = roundup(t[-1].size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 				     64 / sizeof(struct bkey_float));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 		t->tree = t[-1].tree + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 		t->prev = t[-1].prev + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	while (t < b->set + MAX_BSETS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		t++->size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) static void bch_bset_build_unwritten_tree(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	struct bset_tree *t = bset_tree_last(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	BUG_ON(b->last_set_unwritten);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	b->last_set_unwritten = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	bset_alloc_tree(b, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	if (t->tree != b->set->tree + btree_keys_cachelines(b)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		t->size = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	if (i != b->set->data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 		b->set[++b->nsets].data = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		i->seq = b->set->data->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 		get_random_bytes(&i->seq, sizeof(uint64_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	i->magic	= magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	i->version	= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	i->keys		= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	bch_bset_build_unwritten_tree(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682)  * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683)  * accelerate bkey search in a btree node (pointed by bset_tree->data in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684)  * memory). After search in the auxiliar tree by calling bset_search_tree(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685)  * a struct bset_search_iter is returned which indicates range [l, r] from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686)  * bset_tree->data where the searching bkey might be inside. Then a followed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687)  * linear comparison does the exact search, see __bch_bset_search() for how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688)  * the auxiliary tree is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) void bch_bset_build_written_tree(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	struct bset_tree *t = bset_tree_last(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	struct bkey *prev = NULL, *k = t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	unsigned int j, cacheline = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	b->last_set_unwritten = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	bset_alloc_tree(b, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	t->size = min_t(unsigned int,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 			bkey_to_cacheline(t, bset_bkey_last(t->data)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 			b->set->tree + btree_keys_cachelines(b) - t->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	if (t->size < 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 		t->size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	/* First we figure out where the first key in each cacheline is */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	for (j = inorder_next(0, t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	     j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	     j = inorder_next(j, t->size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		while (bkey_to_cacheline(t, k) < cacheline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 			prev = k, k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		t->prev[j] = bkey_u64s(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 		t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	while (bkey_next(k) != bset_bkey_last(t->data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 		k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	t->end = *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	/* Then we build the tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 	for (j = inorder_next(0, t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	     j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	     j = inorder_next(j, t->size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 		make_bfloat(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) /* Insert */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	struct bset_tree *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	unsigned int inorder, j = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	for (t = b->set; t <= bset_tree_last(b); t++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 		if (k < bset_bkey_last(t->data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 			goto found_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) found_set:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	if (!t->size || !bset_written(b, t))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	inorder = bkey_to_cacheline(t, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	if (k == t->data->start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 		goto fix_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	if (bkey_next(k) == bset_bkey_last(t->data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 		t->end = *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		goto fix_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	j = inorder_to_tree(inorder, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	if (j &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	    j < t->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	    k == tree_to_bkey(t, j))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) fix_left:	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 			make_bfloat(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 			j = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 		} while (j < t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	j = inorder_to_tree(inorder + 1, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	if (j &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	    j < t->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	    k == tree_to_prev_bkey(t, j))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) fix_right:	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 			make_bfloat(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 			j = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 		} while (j < t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) static void bch_bset_fix_lookup_table(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 				      struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 				      struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	unsigned int shift = bkey_u64s(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	unsigned int j = bkey_to_cacheline(t, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	/* We're getting called from btree_split() or btree_gc, just bail out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	if (!t->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	 * k is the key we just inserted; we need to find the entry in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	 * lookup table for the first key that is strictly greater than k:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	 * it's either k's cacheline or the next one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	while (j < t->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	       table_to_bkey(t, j) <= k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 	 * Adjust all the lookup table entries, and find a new key for any that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	 * have gotten too big
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	for (; j < t->size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 		t->prev[j] += shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		if (t->prev[j] > 7) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 			k = table_to_bkey(t, j - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 			while (k < cacheline_to_bkey(t, j, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 				k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 			t->prev[j] = bkey_to_cacheline_offset(t, j, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	if (t->size == b->set->tree + btree_keys_cachelines(b) - t->tree)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	/* Possibly add a new entry to the end of the lookup table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	for (k = table_to_bkey(t, t->size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	     k != bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	     k = bkey_next(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 		if (t->size == bkey_to_cacheline(t, k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 			t->prev[t->size] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 				bkey_to_cacheline_offset(t, t->size, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 			t->size++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834)  * Tries to merge l and r: l should be lower than r
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835)  * Returns true if we were able to merge. If we did merge, l will be the merged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836)  * key, r will be untouched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	if (!b->ops->key_merge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	 * Generic header checks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	 * Assumes left and right are in order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 	 * Left and right must be exactly aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	if (!bch_bkey_equal_header(l, r) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	     bkey_cmp(l, &START_KEY(r)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	return b->ops->key_merge(b, l, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) void bch_bset_insert(struct btree_keys *b, struct bkey *where,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 		     struct bkey *insert)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	struct bset_tree *t = bset_tree_last(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	BUG_ON(!b->last_set_unwritten);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	BUG_ON(bset_byte_offset(b, t->data) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	       __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	       PAGE_SIZE << b->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	memmove((uint64_t *) where + bkey_u64s(insert),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 		where,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		(void *) bset_bkey_last(t->data) - (void *) where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	t->data->keys += bkey_u64s(insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	bkey_copy(where, insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	bch_bset_fix_lookup_table(b, t, where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 			      struct bkey *replace_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	struct bset *i = bset_tree_last(b)->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	struct bkey *m, *prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	struct bkey preceding_key_on_stack = ZERO_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	struct bkey *preceding_key_p = &preceding_key_on_stack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	 * If k has preceding key, preceding_key_p will be set to address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	 *  of k's preceding key; otherwise preceding_key_p will be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	 * to NULL inside preceding_key().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	if (b->ops->is_extents)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		preceding_key(&START_KEY(k), &preceding_key_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		preceding_key(k, &preceding_key_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	m = bch_btree_iter_init(b, &iter, preceding_key_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	if (b->ops->insert_fixup(b, k, &iter, replace_key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 		return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	status = BTREE_INSERT_STATUS_INSERT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	while (m != bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	       bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		prev = m, m = bkey_next(m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	/* prev is in the tree, if we merge we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	status = BTREE_INSERT_STATUS_BACK_MERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	if (prev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	    bch_bkey_try_merge(b, prev, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		goto merged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	status = BTREE_INSERT_STATUS_OVERWROTE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	if (m != bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 		goto copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	status = BTREE_INSERT_STATUS_FRONT_MERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	if (m != bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	    bch_bkey_try_merge(b, k, m))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 		goto copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	bch_bset_insert(b, m, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) copy:	bkey_copy(m, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) merged:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) /* Lookup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) struct bset_search_iter {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	struct bkey *l, *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) static struct bset_search_iter bset_search_write_set(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 						     const struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	unsigned int li = 0, ri = t->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	while (li + 1 != ri) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 		unsigned int m = (li + ri) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 		if (bkey_cmp(table_to_bkey(t, m), search) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 			ri = m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 			li = m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	return (struct bset_search_iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		table_to_bkey(t, li),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 		ri < t->size ? table_to_bkey(t, ri) : bset_bkey_last(t->data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) static struct bset_search_iter bset_search_tree(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 						const struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	struct bkey *l, *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	struct bkey_float *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	unsigned int inorder, j, n = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 		unsigned int p = n << 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 		if (p < t->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 			prefetch(&t->tree[p]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		j = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 		f = &t->tree[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		if (likely(f->exponent != 127)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 			if (f->mantissa >= bfloat_mantissa(search, f))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 				n = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 				n = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 			if (bkey_cmp(tree_to_bkey(t, j), search) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 				n = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 				n = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	} while (n < t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	inorder = to_inorder(j, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	 * n would have been the node we recursed to - the low bit tells us if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	 * we recursed left or recursed right.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	if (n & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 		l = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 		if (++inorder != t->size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 			f = &t->tree[inorder_next(j, t->size)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 			r = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 			r = bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 		r = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 		if (--inorder) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 			f = &t->tree[inorder_prev(j, t->size)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 			l = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 			l = t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	return (struct bset_search_iter) {l, r};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 			       const struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	struct bset_search_iter i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	 * First, we search for a cacheline, then lastly we do a linear search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	 * within that cacheline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	 * To search for the cacheline, there's three different possibilities:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	 *  * The set is too small to have a search tree, so we just do a linear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	 *    search over the whole set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	 *  * The set is the one we're currently inserting into; keeping a full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	 *    auxiliary search tree up to date would be too expensive, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	 *    use a much simpler lookup table to do a binary search -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	 *    bset_search_write_set().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	 *  * Or we use the auxiliary search tree we constructed earlier -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	 *    bset_search_tree()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	if (unlikely(!t->size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		i.l = t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 		i.r = bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	} else if (bset_written(b, t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		 * Each node in the auxiliary search tree covers a certain range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		 * of bits, and keys above and below the set it covers might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 		 * differ outside those bits - so we have to special case the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 		 * start and end - handle that here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		if (unlikely(bkey_cmp(search, &t->end) >= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 			return bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 		if (unlikely(bkey_cmp(search, t->data->start) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 			return t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 		i = bset_search_tree(t, search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 		BUG_ON(!b->nsets &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		       t->size < bkey_to_cacheline(t, bset_bkey_last(t->data)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 		i = bset_search_write_set(t, search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	if (btree_keys_expensive_checks(b)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		BUG_ON(bset_written(b, t) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 		       i.l != t->data->start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		       bkey_cmp(tree_to_prev_bkey(t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 			  inorder_to_tree(bkey_to_cacheline(t, i.l), t)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 				search) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 		BUG_ON(i.r != bset_bkey_last(t->data) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 		       bkey_cmp(i.r, search) <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	while (likely(i.l != i.r) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	       bkey_cmp(i.l, search) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 		i.l = bkey_next(i.l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	return i.l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) /* Btree iterator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 				 struct btree_iter_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) static inline bool btree_iter_cmp(struct btree_iter_set l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 				  struct btree_iter_set r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	return bkey_cmp(l.k, r.k) > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) static inline bool btree_iter_end(struct btree_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	return !iter->used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 			 struct bkey *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	if (k != end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		BUG_ON(!heap_add(iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 				 ((struct btree_iter_set) { k, end }),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 				 btree_iter_cmp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 					  struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 					  struct bkey *search,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 					  struct bset_tree *start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	struct bkey *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 	iter->size = ARRAY_SIZE(iter->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	iter->used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) #ifdef CONFIG_BCACHE_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	iter->b = b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	for (; start <= bset_tree_last(b); start++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 		ret = bch_bset_search(b, start, search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) struct bkey *bch_btree_iter_init(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 				 struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 				 struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	return __bch_btree_iter_init(b, iter, search, b->set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 						 btree_iter_cmp_fn *cmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	struct btree_iter_set b __maybe_unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	struct bkey *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	if (!btree_iter_end(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 		bch_btree_iter_next_check(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 		ret = iter->data->k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 		iter->data->k = bkey_next(iter->data->k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 		if (iter->data->k > iter->data->end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 			WARN_ONCE(1, "bset was corrupt!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 			iter->data->k = iter->data->end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 		if (iter->data->k == iter->data->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 			heap_pop(iter, b, cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 			heap_sift(iter, 0, cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) struct bkey *bch_btree_iter_next(struct btree_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	return __bch_btree_iter_next(iter, btree_iter_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 					struct btree_keys *b, ptr_filter_fn fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	struct bkey *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 		ret = bch_btree_iter_next(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	} while (ret && fn(b, ret));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) /* Mergesort */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) void bch_bset_sort_state_free(struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	mempool_exit(&state->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) int bch_bset_sort_state_init(struct bset_sort_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 			     unsigned int page_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	spin_lock_init(&state->time.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	state->page_order = page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	state->crit_factor = int_sqrt(1 << page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	return mempool_init_page_pool(&state->pool, 1, page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) static void btree_mergesort(struct btree_keys *b, struct bset *out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 			    struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 			    bool fixup, bool remove_stale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	struct bkey *k, *last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	BKEY_PADDED(k) tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 		? bch_ptr_bad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 		: bch_ptr_invalid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	/* Heapify the iterator, using our comparison function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	for (i = iter->used / 2 - 1; i >= 0; --i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 		heap_sift(iter, i, b->ops->sort_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	while (!btree_iter_end(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 		if (b->ops->sort_fixup && fixup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 			k = b->ops->sort_fixup(iter, &tmp.k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 			k = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 		if (!k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 			k = __bch_btree_iter_next(iter, b->ops->sort_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 		if (bad(b, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		if (!last) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 			last = out->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 			bkey_copy(last, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		} else if (!bch_bkey_try_merge(b, last, k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 			last = bkey_next(last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 			bkey_copy(last, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	pr_debug("sorted %i keys\n", out->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 			 unsigned int start, unsigned int order, bool fixup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 			 struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	uint64_t start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	bool used_mempool = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 						     order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	if (!out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 		struct page *outp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 		BUG_ON(order > state->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 		outp = mempool_alloc(&state->pool, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 		out = page_address(outp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 		used_mempool = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 		order = state->page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	start_time = local_clock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	btree_mergesort(b, out, iter, fixup, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	b->nsets = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	if (!start && order == b->page_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 		 * Our temporary buffer is the same size as the btree node's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		 * buffer, we can just swap buffers instead of doing a big
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		 * memcpy()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 		 * Don't worry event 'out' is allocated from mempool, it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 		 * still be swapped here. Because state->pool is a page mempool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 		 * creaated by by mempool_init_page_pool(), which allocates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		 * pages by alloc_pages() indeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		out->magic	= b->set->data->magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		out->seq	= b->set->data->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		out->version	= b->set->data->version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		swap(out, b->set->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 		b->set[start].data->keys = out->keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 		memcpy(b->set[start].data->start, out->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		       (void *) bset_bkey_last(out) - (void *) out->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	if (used_mempool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 		mempool_free(virt_to_page(out), &state->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 		free_pages((unsigned long) out, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	bch_bset_build_written_tree(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	if (!start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		bch_time_stats_update(&state->time, start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 			    struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	size_t order = b->page_order, keys = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	int oldsize = bch_count_data(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	if (start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 		for (i = start; i <= b->nsets; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 			keys += b->set[i].data->keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 		order = get_order(__set_bytes(b->set->data, keys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	__btree_sort(b, &iter, start, order, false, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) void bch_btree_sort_and_fix_extents(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 				    struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 				    struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	__btree_sort(b, iter, 0, b->page_order, true, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 			 struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	uint64_t start_time = local_clock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	bch_btree_iter_init(b, &iter, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	btree_mergesort(b, new->set->data, &iter, false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	bch_time_stats_update(&state->time, start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	new->set->size = 0; // XXX: why?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) #define SORT_CRIT	(4096 / sizeof(uint64_t))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	unsigned int crit = SORT_CRIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	/* Don't sort if nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	if (!b->nsets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	for (i = b->nsets - 1; i >= 0; --i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 		crit *= state->crit_factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 		if (b->set[i].data->keys < crit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 			bch_btree_sort_partial(b, i, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	/* Sort if we'd overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	if (b->nsets + 1 == MAX_BSETS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 		bch_btree_sort(b, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	bch_bset_build_written_tree(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	for (i = 0; i <= b->nsets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 		struct bset_tree *t = &b->set[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 		size_t bytes = t->data->keys * sizeof(uint64_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		size_t j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 		if (bset_written(b, t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 			stats->sets_written++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 			stats->bytes_written += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 			stats->floats += t->size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 			for (j = 1; j < t->size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 				if (t->tree[j].exponent == 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 					stats->failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 			stats->sets_unwritten++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 			stats->bytes_unwritten += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) }