^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Code for working with individual keys, and sorted sets of keys with in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * btree node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright 2012 Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #define pr_fmt(fmt) "bcache: %s() " fmt, __func__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "util.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "bset.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/console.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/sched/clock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #ifdef CONFIG_BCACHE_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) struct bkey *k, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) for (k = i->start; k < bset_bkey_last(i); k = next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) next = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) pr_err("block %u key %u/%u: ", set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) (unsigned int) ((u64 *) k - i->d), i->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) if (b->ops->key_dump)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) b->ops->key_dump(b, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) if (next < bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) bkey_cmp(k, b->ops->is_extents ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) &START_KEY(next) : next) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) pr_err("Key skipped backwards\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) void bch_dump_bucket(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) console_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) for (i = 0; i <= b->nsets; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) bch_dump_bset(b, b->set[i].data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) bset_sector_offset(b, b->set[i].data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) console_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) int __bch_count_data(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) unsigned int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct bkey *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (b->ops->is_extents)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) for_each_key(b, k, &iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) ret += KEY_SIZE(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) va_list args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct bkey *k, *p = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) const char *err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) for_each_key(b, k, &iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (b->ops->is_extents) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) err = "Keys out of order";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) if (bch_ptr_invalid(b, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) err = "Overlapping keys";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) if (p && bkey_cmp(p, &START_KEY(k)) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) if (bch_ptr_bad(b, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) err = "Duplicate keys";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) if (p && !bkey_cmp(p, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) p = k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) err = "Key larger than btree node key";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) if (p && bkey_cmp(p, &b->key) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) goto bug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) bug:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) bch_dump_bucket(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) va_start(args, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) vprintk(fmt, args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) va_end(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) panic("bch_check_keys error: %s:\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static void bch_btree_iter_next_check(struct btree_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) struct bkey *k = iter->data->k, *next = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) if (next < iter->data->end &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) bkey_cmp(k, iter->b->ops->is_extents ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) &START_KEY(next) : next) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) bch_dump_bucket(iter->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) panic("Key skipped backwards\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /* Keylists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) size_t oldsize = bch_keylist_nkeys(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) size_t newsize = oldsize + u64s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) uint64_t *new_keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) newsize = roundup_pow_of_two(newsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (newsize <= KEYLIST_INLINE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) roundup_pow_of_two(oldsize) == newsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) new_keys = krealloc(old_keys, sizeof(uint64_t) * newsize, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) if (!new_keys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (!old_keys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) memcpy(new_keys, l->inline_keys, sizeof(uint64_t) * oldsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) l->keys_p = new_keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) l->top_p = new_keys + oldsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) /* Pop the top key of keylist by pointing l->top to its previous key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) struct bkey *bch_keylist_pop(struct keylist *l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) struct bkey *k = l->keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) if (k == l->top)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) while (bkey_next(k) != l->top)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) return l->top = k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /* Pop the bottom key of keylist and update l->top_p */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) void bch_keylist_pop_front(struct keylist *l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) l->top_p -= bkey_u64s(l->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) memmove(l->keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) bkey_next(l->keys),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) bch_keylist_bytes(l));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /* Key/pointer manipulation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) unsigned int i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) BUG_ON(i > KEY_PTRS(src));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) /* Only copy the header, key, and one pointer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) memcpy(dest, src, 2 * sizeof(uint64_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) dest->ptr[0] = src->ptr[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) SET_KEY_PTRS(dest, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /* We didn't copy the checksum so clear that bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) SET_KEY_CSUM(dest, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) bool __bch_cut_front(const struct bkey *where, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) unsigned int i, len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (bkey_cmp(where, &START_KEY(k)) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) if (bkey_cmp(where, k) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) len = KEY_OFFSET(k) - KEY_OFFSET(where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) bkey_copy_key(k, where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + KEY_SIZE(k) - len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) BUG_ON(len > KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) SET_KEY_SIZE(k, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) bool __bch_cut_back(const struct bkey *where, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) unsigned int len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (bkey_cmp(where, k) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) BUG_ON(KEY_INODE(where) != KEY_INODE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) if (bkey_cmp(where, &START_KEY(k)) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) len = KEY_OFFSET(where) - KEY_START(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) bkey_copy_key(k, where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) BUG_ON(len > KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) SET_KEY_SIZE(k, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) /* Auxiliary search trees */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) /* 32 bits total: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) #define BKEY_MID_BITS 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) #define BKEY_EXPONENT_BITS 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) #define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) struct bkey_float {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) unsigned int exponent:BKEY_EXPONENT_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) unsigned int m:BKEY_MID_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) unsigned int mantissa:BKEY_MANTISSA_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) } __packed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * BSET_CACHELINE was originally intended to match the hardware cacheline size -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * it used to be 64, but I realized the lookup code would touch slightly less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * memory if it was 128.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * It definites the number of bytes (in struct bset) per struct bkey_float in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * the auxiliar search tree - when we're done searching the bset_float tree we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * have this many bytes left that we do a linear search over.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * Since (after level 5) every level of the bset_tree is on a new cacheline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * we're touching one fewer cacheline in the bset tree in exchange for one more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * cacheline in the linear search - but the linear search might stop before it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * gets to the second cacheline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) #define BSET_CACHELINE 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) /* Space required for the btree node keys */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) static inline size_t btree_keys_bytes(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return PAGE_SIZE << b->page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) static inline size_t btree_keys_cachelines(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) return btree_keys_bytes(b) / BSET_CACHELINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) /* Space required for the auxiliary search trees */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) static inline size_t bset_tree_bytes(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) return btree_keys_cachelines(b) * sizeof(struct bkey_float);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /* Space required for the prev pointers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) static inline size_t bset_prev_bytes(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) return btree_keys_cachelines(b) * sizeof(uint8_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) /* Memory allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) void bch_btree_keys_free(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) struct bset_tree *t = b->set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (bset_prev_bytes(b) < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) kfree(t->prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) free_pages((unsigned long) t->prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) get_order(bset_prev_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (bset_tree_bytes(b) < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) kfree(t->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) free_pages((unsigned long) t->tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) get_order(bset_tree_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) free_pages((unsigned long) t->data, b->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) t->prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) t->tree = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) t->data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) int bch_btree_keys_alloc(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) unsigned int page_order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) struct bset_tree *t = b->set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) BUG_ON(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) b->page_order = page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) t->data = (void *) __get_free_pages(__GFP_COMP|gfp, b->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) if (!t->data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) t->tree = bset_tree_bytes(b) < PAGE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) ? kmalloc(bset_tree_bytes(b), gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (!t->tree)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) t->prev = bset_prev_bytes(b) < PAGE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) ? kmalloc(bset_prev_bytes(b), gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (!t->prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) bch_btree_keys_free(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) bool *expensive_debug_checks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) b->ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) b->expensive_debug_checks = expensive_debug_checks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) b->nsets = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) b->last_set_unwritten = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * struct btree_keys in embedded in struct btree, and struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * bset_tree is embedded into struct btree_keys. They are all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * initialized as 0 by kzalloc() in mca_bucket_alloc(), and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * b->set[0].data is allocated in bch_btree_keys_alloc(), so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * don't have to initiate b->set[].size and b->set[].data here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * any more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /* Binary tree stuff for auxiliary search trees */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * return array index next to j when does in-order traverse
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * of a binary tree which is stored in a linear array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) static unsigned int inorder_next(unsigned int j, unsigned int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if (j * 2 + 1 < size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) j = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) while (j * 2 < size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) j *= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) j >>= ffz(j) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * return array index previous to j when does in-order traverse
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * of a binary tree which is stored in a linear array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) static unsigned int inorder_prev(unsigned int j, unsigned int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (j * 2 < size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) j = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) while (j * 2 + 1 < size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) j = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) j >>= ffs(j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * I have no idea why this code works... and I'm the one who wrote it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * However, I do know what it does:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * Given a binary tree constructed in an array (i.e. how you normally implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * a heap), it converts a node in the tree - referenced by array index - to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * index it would have if you did an inorder traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * Also tested for every j, size up to size somewhere around 6 million.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * The binary tree starts at array index 1, not 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * extra is a function of size:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * extra = (size - rounddown_pow_of_two(size - 1)) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) static unsigned int __to_inorder(unsigned int j,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) unsigned int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) unsigned int extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) unsigned int b = fls(j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) unsigned int shift = fls(size - 1) - b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) j ^= 1U << (b - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) j <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) j |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) j <<= shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) if (j > extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) j -= (j - extra) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * Return the cacheline index in bset_tree->data, where j is index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * from a linear array which stores the auxiliar binary tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) static unsigned int to_inorder(unsigned int j, struct bset_tree *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) return __to_inorder(j, t->size, t->extra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) static unsigned int __inorder_to_tree(unsigned int j,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) unsigned int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) unsigned int extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) unsigned int shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (j > extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) j += j - extra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) shift = ffs(j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) j >>= shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) j |= roundup_pow_of_two(size) >> shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * Return an index from a linear array which stores the auxiliar binary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * tree, j is the cacheline index of t->data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) static unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) return __inorder_to_tree(j, t->size, t->extra);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) void inorder_test(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) unsigned long done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) ktime_t start = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) for (unsigned int size = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) size < 65536000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) size++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) unsigned int extra =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) (size - rounddown_pow_of_two(size - 1)) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) unsigned int i = 1, j = rounddown_pow_of_two(size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) if (!(size % 4096))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) pr_notice("loop %u, %llu per us\n", size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) done / ktime_us_delta(ktime_get(), start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (__inorder_to_tree(i, size, extra) != j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) panic("size %10u j %10u i %10u", size, j, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (__to_inorder(j, size, extra) != i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) panic("size %10u j %10u i %10u", size, j, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (j == rounddown_pow_of_two(size) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) BUG_ON(inorder_prev(inorder_next(j, size), size) != j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) j = inorder_next(j, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) done += size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * Cacheline/offset <-> bkey pointer arithmetic:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * t->tree is a binary search tree in an array; each node corresponds to a key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * in one cacheline in t->set (BSET_CACHELINE bytes).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * This means we don't have to store the full index of the key that a node in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * the binary tree points to; to_inorder() gives us the cacheline, and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * bkey_float->m gives us the offset within that cacheline, in units of 8 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * make this work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * To construct the bfloat for an arbitrary key we need to know what the key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * immediately preceding it is: we have to check if the two keys differ in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * of the previous key so we can walk backwards to it from t->tree[j]'s key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) static struct bkey *cacheline_to_bkey(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) unsigned int cacheline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) unsigned int offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) static unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) return ((void *) k - (void *) t->data) / BSET_CACHELINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) static unsigned int bkey_to_cacheline_offset(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) unsigned int cacheline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * For the write set - the one we're currently inserting keys into - we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) * maintain a full search tree, we just keep a simple lookup table in t->prev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) static struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) return cacheline_to_bkey(t, cacheline, t->prev[cacheline]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) low >>= shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) low |= (high << 1) << (63U - shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) return low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * Calculate mantissa value for struct bkey_float.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * If most significant bit of f->exponent is not set, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * - f->exponent >> 6 is 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * - p[0] points to bkey->low
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * - p[-1] borrows bits from KEY_INODE() of bkey->high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * if most isgnificant bits of f->exponent is set, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * - f->exponent >> 6 is 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * - p[0] points to bits from KEY_INODE() of bkey->high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * - p[-1] points to other bits from KEY_INODE() of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * bkey->high too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * See make_bfloat() to check when most significant bit of f->exponent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * is set or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) static inline unsigned int bfloat_mantissa(const struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) struct bkey_float *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) const uint64_t *p = &k->low - (f->exponent >> 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) static void make_bfloat(struct bset_tree *t, unsigned int j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) struct bkey_float *f = &t->tree[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct bkey *m = tree_to_bkey(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) struct bkey *p = tree_to_prev_bkey(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) struct bkey *l = is_power_of_2(j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) ? t->data->start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) : tree_to_prev_bkey(t, j >> ffs(j));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) struct bkey *r = is_power_of_2(j + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) ? bset_bkey_idx(t->data, t->data->keys - bkey_u64s(&t->end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) : tree_to_bkey(t, j >> (ffz(j) + 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) BUG_ON(m < l || m > r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) BUG_ON(bkey_next(p) != m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * If l and r have different KEY_INODE values (different backing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) * device), f->exponent records how many least significant bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * are different in KEY_INODE values and sets most significant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) * bits to 1 (by +64).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * If l and r have same KEY_INODE value, f->exponent records
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * how many different bits in least significant bits of bkey->low.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * See bfloat_mantiss() how the most significant bit of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * f->exponent is used to calculate bfloat mantissa value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (KEY_INODE(l) != KEY_INODE(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) f->exponent = fls64(r->low ^ l->low);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) f->exponent = max_t(int, f->exponent - BKEY_MANTISSA_BITS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * Setting f->exponent = 127 flags this node as failed, and causes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * lookup code to fall back to comparing against the original key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (bfloat_mantissa(m, f) != bfloat_mantissa(p, f))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) f->mantissa = bfloat_mantissa(m, f) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) f->exponent = 127;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) if (t != b->set) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) unsigned int j = roundup(t[-1].size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 64 / sizeof(struct bkey_float));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) t->tree = t[-1].tree + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) t->prev = t[-1].prev + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) while (t < b->set + MAX_BSETS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) t++->size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) static void bch_bset_build_unwritten_tree(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) struct bset_tree *t = bset_tree_last(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) BUG_ON(b->last_set_unwritten);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) b->last_set_unwritten = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) bset_alloc_tree(b, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) if (t->tree != b->set->tree + btree_keys_cachelines(b)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) t->size = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (i != b->set->data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) b->set[++b->nsets].data = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) i->seq = b->set->data->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) get_random_bytes(&i->seq, sizeof(uint64_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) i->magic = magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) i->version = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) i->keys = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) bch_bset_build_unwritten_tree(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * accelerate bkey search in a btree node (pointed by bset_tree->data in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * memory). After search in the auxiliar tree by calling bset_search_tree(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) * a struct bset_search_iter is returned which indicates range [l, r] from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * bset_tree->data where the searching bkey might be inside. Then a followed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) * linear comparison does the exact search, see __bch_bset_search() for how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) * the auxiliary tree is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) void bch_bset_build_written_tree(struct btree_keys *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) struct bset_tree *t = bset_tree_last(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) struct bkey *prev = NULL, *k = t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) unsigned int j, cacheline = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) b->last_set_unwritten = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) bset_alloc_tree(b, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) t->size = min_t(unsigned int,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) bkey_to_cacheline(t, bset_bkey_last(t->data)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) b->set->tree + btree_keys_cachelines(b) - t->tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) if (t->size < 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) t->size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) /* First we figure out where the first key in each cacheline is */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) for (j = inorder_next(0, t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) j = inorder_next(j, t->size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) while (bkey_to_cacheline(t, k) < cacheline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) prev = k, k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) t->prev[j] = bkey_u64s(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) while (bkey_next(k) != bset_bkey_last(t->data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) t->end = *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) /* Then we build the tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) for (j = inorder_next(0, t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) j = inorder_next(j, t->size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) make_bfloat(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) /* Insert */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) struct bset_tree *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) unsigned int inorder, j = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) for (t = b->set; t <= bset_tree_last(b); t++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) if (k < bset_bkey_last(t->data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) goto found_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) found_set:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (!t->size || !bset_written(b, t))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) inorder = bkey_to_cacheline(t, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) if (k == t->data->start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) goto fix_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) if (bkey_next(k) == bset_bkey_last(t->data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) t->end = *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) goto fix_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) j = inorder_to_tree(inorder, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) if (j &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) j < t->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) k == tree_to_bkey(t, j))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) fix_left: do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) make_bfloat(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) j = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) } while (j < t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) j = inorder_to_tree(inorder + 1, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) if (j &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) j < t->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) k == tree_to_prev_bkey(t, j))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) fix_right: do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) make_bfloat(t, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) j = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) } while (j < t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) static void bch_bset_fix_lookup_table(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) unsigned int shift = bkey_u64s(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) unsigned int j = bkey_to_cacheline(t, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) /* We're getting called from btree_split() or btree_gc, just bail out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) if (!t->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) * k is the key we just inserted; we need to find the entry in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * lookup table for the first key that is strictly greater than k:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) * it's either k's cacheline or the next one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) while (j < t->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) table_to_bkey(t, j) <= k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * Adjust all the lookup table entries, and find a new key for any that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) * have gotten too big
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) for (; j < t->size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) t->prev[j] += shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (t->prev[j] > 7) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) k = table_to_bkey(t, j - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) while (k < cacheline_to_bkey(t, j, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) k = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) t->prev[j] = bkey_to_cacheline_offset(t, j, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) if (t->size == b->set->tree + btree_keys_cachelines(b) - t->tree)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) /* Possibly add a new entry to the end of the lookup table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) for (k = table_to_bkey(t, t->size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) k != bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) k = bkey_next(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) if (t->size == bkey_to_cacheline(t, k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) t->prev[t->size] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) bkey_to_cacheline_offset(t, t->size, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) t->size++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) * Tries to merge l and r: l should be lower than r
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) * Returns true if we were able to merge. If we did merge, l will be the merged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) * key, r will be untouched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) if (!b->ops->key_merge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * Generic header checks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * Assumes left and right are in order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * Left and right must be exactly aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) if (!bch_bkey_equal_header(l, r) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) bkey_cmp(l, &START_KEY(r)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) return b->ops->key_merge(b, l, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) void bch_bset_insert(struct btree_keys *b, struct bkey *where,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) struct bkey *insert)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) struct bset_tree *t = bset_tree_last(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) BUG_ON(!b->last_set_unwritten);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) BUG_ON(bset_byte_offset(b, t->data) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) PAGE_SIZE << b->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) memmove((uint64_t *) where + bkey_u64s(insert),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) where,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) (void *) bset_bkey_last(t->data) - (void *) where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) t->data->keys += bkey_u64s(insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) bkey_copy(where, insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) bch_bset_fix_lookup_table(b, t, where);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) struct bkey *replace_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) struct bset *i = bset_tree_last(b)->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) struct bkey *m, *prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) struct bkey preceding_key_on_stack = ZERO_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) struct bkey *preceding_key_p = &preceding_key_on_stack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * If k has preceding key, preceding_key_p will be set to address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) * of k's preceding key; otherwise preceding_key_p will be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) * to NULL inside preceding_key().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) if (b->ops->is_extents)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) preceding_key(&START_KEY(k), &preceding_key_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) preceding_key(k, &preceding_key_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) m = bch_btree_iter_init(b, &iter, preceding_key_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) if (b->ops->insert_fixup(b, k, &iter, replace_key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) status = BTREE_INSERT_STATUS_INSERT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) while (m != bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) prev = m, m = bkey_next(m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) /* prev is in the tree, if we merge we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) status = BTREE_INSERT_STATUS_BACK_MERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) if (prev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) bch_bkey_try_merge(b, prev, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) goto merged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) status = BTREE_INSERT_STATUS_OVERWROTE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) if (m != bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) goto copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) status = BTREE_INSERT_STATUS_FRONT_MERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (m != bset_bkey_last(i) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) bch_bkey_try_merge(b, k, m))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) goto copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) bch_bset_insert(b, m, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) copy: bkey_copy(m, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) merged:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) return status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) /* Lookup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) struct bset_search_iter {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) struct bkey *l, *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) static struct bset_search_iter bset_search_write_set(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) const struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) unsigned int li = 0, ri = t->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) while (li + 1 != ri) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) unsigned int m = (li + ri) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) if (bkey_cmp(table_to_bkey(t, m), search) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) ri = m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) li = m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) return (struct bset_search_iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) table_to_bkey(t, li),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) ri < t->size ? table_to_bkey(t, ri) : bset_bkey_last(t->data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) static struct bset_search_iter bset_search_tree(struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) const struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) struct bkey *l, *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) struct bkey_float *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) unsigned int inorder, j, n = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) unsigned int p = n << 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) if (p < t->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) prefetch(&t->tree[p]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) j = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) f = &t->tree[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) if (likely(f->exponent != 127)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) if (f->mantissa >= bfloat_mantissa(search, f))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) n = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) n = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) if (bkey_cmp(tree_to_bkey(t, j), search) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) n = j * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) n = j * 2 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) } while (n < t->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) inorder = to_inorder(j, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) * n would have been the node we recursed to - the low bit tells us if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) * we recursed left or recursed right.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (n & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) l = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) if (++inorder != t->size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) f = &t->tree[inorder_next(j, t->size)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) r = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) r = bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) r = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) if (--inorder) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) f = &t->tree[inorder_prev(j, t->size)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) l = cacheline_to_bkey(t, inorder, f->m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) l = t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) return (struct bset_search_iter) {l, r};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) const struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) struct bset_search_iter i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * First, we search for a cacheline, then lastly we do a linear search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * within that cacheline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) * To search for the cacheline, there's three different possibilities:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) * * The set is too small to have a search tree, so we just do a linear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * search over the whole set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * * The set is the one we're currently inserting into; keeping a full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * auxiliary search tree up to date would be too expensive, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * use a much simpler lookup table to do a binary search -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * bset_search_write_set().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * * Or we use the auxiliary search tree we constructed earlier -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) * bset_search_tree()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) if (unlikely(!t->size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) i.l = t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) i.r = bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) } else if (bset_written(b, t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) * Each node in the auxiliary search tree covers a certain range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) * of bits, and keys above and below the set it covers might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * differ outside those bits - so we have to special case the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) * start and end - handle that here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) if (unlikely(bkey_cmp(search, &t->end) >= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) return bset_bkey_last(t->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) if (unlikely(bkey_cmp(search, t->data->start) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) return t->data->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) i = bset_search_tree(t, search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) BUG_ON(!b->nsets &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) t->size < bkey_to_cacheline(t, bset_bkey_last(t->data)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) i = bset_search_write_set(t, search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) if (btree_keys_expensive_checks(b)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) BUG_ON(bset_written(b, t) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) i.l != t->data->start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) bkey_cmp(tree_to_prev_bkey(t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) inorder_to_tree(bkey_to_cacheline(t, i.l), t)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) search) > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) BUG_ON(i.r != bset_bkey_last(t->data) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) bkey_cmp(i.r, search) <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) while (likely(i.l != i.r) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) bkey_cmp(i.l, search) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) i.l = bkey_next(i.l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) return i.l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) /* Btree iterator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) struct btree_iter_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) static inline bool btree_iter_cmp(struct btree_iter_set l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) struct btree_iter_set r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) return bkey_cmp(l.k, r.k) > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) static inline bool btree_iter_end(struct btree_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) return !iter->used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) struct bkey *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (k != end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) BUG_ON(!heap_add(iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) ((struct btree_iter_set) { k, end }),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) btree_iter_cmp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) struct bkey *search,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) struct bset_tree *start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) struct bkey *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) iter->size = ARRAY_SIZE(iter->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) iter->used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) #ifdef CONFIG_BCACHE_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) iter->b = b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) for (; start <= bset_tree_last(b); start++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) ret = bch_bset_search(b, start, search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) struct bkey *bch_btree_iter_init(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) struct bkey *search)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) return __bch_btree_iter_init(b, iter, search, b->set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) btree_iter_cmp_fn *cmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) struct btree_iter_set b __maybe_unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) struct bkey *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) if (!btree_iter_end(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) bch_btree_iter_next_check(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) ret = iter->data->k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) iter->data->k = bkey_next(iter->data->k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) if (iter->data->k > iter->data->end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) WARN_ONCE(1, "bset was corrupt!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) iter->data->k = iter->data->end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (iter->data->k == iter->data->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) heap_pop(iter, b, cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) heap_sift(iter, 0, cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) struct bkey *bch_btree_iter_next(struct btree_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) return __bch_btree_iter_next(iter, btree_iter_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) struct btree_keys *b, ptr_filter_fn fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) struct bkey *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) ret = bch_btree_iter_next(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) } while (ret && fn(b, ret));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) /* Mergesort */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) void bch_bset_sort_state_free(struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) mempool_exit(&state->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) int bch_bset_sort_state_init(struct bset_sort_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) unsigned int page_order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) spin_lock_init(&state->time.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) state->page_order = page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) state->crit_factor = int_sqrt(1 << page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) return mempool_init_page_pool(&state->pool, 1, page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) static void btree_mergesort(struct btree_keys *b, struct bset *out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) bool fixup, bool remove_stale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) struct bkey *k, *last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) BKEY_PADDED(k) tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) ? bch_ptr_bad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) : bch_ptr_invalid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /* Heapify the iterator, using our comparison function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) for (i = iter->used / 2 - 1; i >= 0; --i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) heap_sift(iter, i, b->ops->sort_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) while (!btree_iter_end(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) if (b->ops->sort_fixup && fixup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) k = b->ops->sort_fixup(iter, &tmp.k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) k = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (!k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) k = __bch_btree_iter_next(iter, b->ops->sort_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) if (bad(b, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) if (!last) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) last = out->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) bkey_copy(last, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) } else if (!bch_bkey_try_merge(b, last, k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) last = bkey_next(last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) bkey_copy(last, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) pr_debug("sorted %i keys\n", out->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) unsigned int start, unsigned int order, bool fixup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) uint64_t start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) bool used_mempool = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) if (!out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) struct page *outp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) BUG_ON(order > state->page_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) outp = mempool_alloc(&state->pool, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) out = page_address(outp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) used_mempool = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) order = state->page_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) start_time = local_clock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) btree_mergesort(b, out, iter, fixup, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) b->nsets = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) if (!start && order == b->page_order) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) * Our temporary buffer is the same size as the btree node's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) * buffer, we can just swap buffers instead of doing a big
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) * memcpy()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) * Don't worry event 'out' is allocated from mempool, it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) * still be swapped here. Because state->pool is a page mempool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) * creaated by by mempool_init_page_pool(), which allocates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * pages by alloc_pages() indeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) out->magic = b->set->data->magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) out->seq = b->set->data->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) out->version = b->set->data->version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) swap(out, b->set->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) b->set[start].data->keys = out->keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) memcpy(b->set[start].data->start, out->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) (void *) bset_bkey_last(out) - (void *) out->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) if (used_mempool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) mempool_free(virt_to_page(out), &state->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) free_pages((unsigned long) out, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) bch_bset_build_written_tree(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (!start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) bch_time_stats_update(&state->time, start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) size_t order = b->page_order, keys = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) int oldsize = bch_count_data(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) __bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) for (i = start; i <= b->nsets; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) keys += b->set[i].data->keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) order = get_order(__set_bytes(b->set->data, keys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) __btree_sort(b, &iter, start, order, false, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) void bch_btree_sort_and_fix_extents(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) __btree_sort(b, iter, 0, b->page_order, true, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) uint64_t start_time = local_clock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) bch_btree_iter_init(b, &iter, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) btree_mergesort(b, new->set->data, &iter, false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) bch_time_stats_update(&state->time, start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) new->set->size = 0; // XXX: why?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) #define SORT_CRIT (4096 / sizeof(uint64_t))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) unsigned int crit = SORT_CRIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) /* Don't sort if nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) if (!b->nsets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) for (i = b->nsets - 1; i >= 0; --i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) crit *= state->crit_factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) if (b->set[i].data->keys < crit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) bch_btree_sort_partial(b, i, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) /* Sort if we'd overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) if (b->nsets + 1 == MAX_BSETS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) bch_btree_sort(b, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) bch_bset_build_written_tree(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) for (i = 0; i <= b->nsets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) struct bset_tree *t = &b->set[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) size_t bytes = t->data->keys * sizeof(uint64_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) size_t j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) if (bset_written(b, t)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) stats->sets_written++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) stats->bytes_written += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) stats->floats += t->size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) for (j = 1; j < t->size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) if (t->tree[j].exponent == 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) stats->failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) stats->sets_unwritten++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) stats->bytes_unwritten += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) }