^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Uses a block device as cache for other block devices; optimized for SSDs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * All allocation is done in buckets, which should match the erase block size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * of the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Buckets containing cached data are kept on a heap sorted by priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * bucket priority is increased on cache hit, and periodically all the buckets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * on the heap have their priority scaled down. This currently is just used as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * an LRU but in the future should allow for more intelligent heuristics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * counter. Garbage collection is used to remove stale pointers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * as keys are inserted we only sort the pages that have not yet been written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * When garbage collection is run, we resort the entire node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * All configuration is done via sysfs; see Documentation/admin-guide/bcache.rst.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include "bcache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "btree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "debug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "extents.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "writeback.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) static void sort_key_next(struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) struct btree_iter_set *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) i->k = bkey_next(i->k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) if (i->k == i->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *i = iter->data[--iter->used];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) static bool bch_key_sort_cmp(struct btree_iter_set l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) struct btree_iter_set r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) int64_t c = bkey_cmp(l.k, r.k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) return c ? c > 0 : l.k < r.k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) if (ptr_available(c, k, i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct cache *ca = PTR_CACHE(c, k, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) size_t bucket = PTR_BUCKET_NR(c, k, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) if (KEY_SIZE(k) + r > c->cache->sb.bucket_size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) bucket < ca->sb.first_bucket ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) bucket >= ca->sb.nbuckets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /* Common among btree and extent ptrs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) if (ptr_available(c, k, i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct cache *ca = PTR_CACHE(c, k, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) size_t bucket = PTR_BUCKET_NR(c, k, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) return "bad, length too big";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) if (bucket < ca->sb.first_bucket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) return "bad, short offset";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) if (bucket >= ca->sb.nbuckets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) return "bad, offset past end of device";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) if (ptr_stale(c, k, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) return "stale";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (!bkey_cmp(k, &ZERO_KEY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) return "bad, null key";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) if (!KEY_PTRS(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) return "bad, no pointers";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (!KEY_SIZE(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) return "zeroed key";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) return "";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) unsigned int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) char *out = buf, *end = buf + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) for (i = 0; i < KEY_PTRS(k); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) if (i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) p(", ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (PTR_DEV(k, i) == PTR_CHECK_DEV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) p("check dev");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) p("%llu:%llu gen %llu", PTR_DEV(k, i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) PTR_OFFSET(k, i), PTR_GEN(k, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) p("]");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (KEY_DIRTY(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) p(" dirty");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (KEY_CSUM(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #undef p
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) struct btree *b = container_of(keys, struct btree, keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) unsigned int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) char buf[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) bch_extent_to_text(buf, sizeof(buf), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) pr_cont(" %s", buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) for (j = 0; j < KEY_PTRS(k); j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) size_t n = PTR_BUCKET_NR(b->c, k, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) pr_cont(" bucket %zu", n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (n >= b->c->cache->sb.first_bucket && n < b->c->cache->sb.nbuckets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) pr_cont(" prio %i",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) PTR_BUCKET(b->c, k, j)->prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) pr_cont(" %s\n", bch_ptr_status(b->c, k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /* Btree ptrs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) char buf[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (__ptr_invalid(c, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) bch_extent_to_text(buf, sizeof(buf), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) struct btree *b = container_of(bk, struct btree, keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) return __bch_btree_ptr_invalid(b->c, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) char buf[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) struct bucket *g;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) if (mutex_trylock(&b->c->bucket_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) if (ptr_available(b->c, k, i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) g = PTR_BUCKET(b->c, k, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (KEY_DIRTY(k) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) g->prio != BTREE_PRIO ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) (b->c->gc_mark_valid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) GC_MARK(g) != GC_MARK_METADATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) mutex_unlock(&b->c->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) mutex_unlock(&b->c->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) bch_extent_to_text(buf, sizeof(buf), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) btree_bug(b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) "inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) g->prio, g->gen, g->last_gc, GC_MARK(g));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) struct btree *b = container_of(bk, struct btree, keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (!bkey_cmp(k, &ZERO_KEY) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) !KEY_PTRS(k) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) bch_ptr_invalid(bk, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (!ptr_available(b->c, k, i) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) ptr_stale(b->c, k, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (expensive_debug_checks(b->c) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) btree_ptr_bad_expensive(b, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) struct bkey *insert,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) struct bkey *replace_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) struct btree *b = container_of(bk, struct btree, keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) if (!KEY_OFFSET(insert))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) btree_current_write(b)->prio_blocked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) const struct btree_keys_ops bch_btree_keys_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) .sort_cmp = bch_key_sort_cmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) .insert_fixup = bch_btree_ptr_insert_fixup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) .key_invalid = bch_btree_ptr_invalid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) .key_bad = bch_btree_ptr_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) .key_to_text = bch_extent_to_text,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) .key_dump = bch_bkey_dump,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /* Extents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * Returns true if l > r - unless l == r, in which case returns true if l is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * older than r.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * Necessary for btree_sort_fixup() - if there are multiple keys that compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * equal in different sets, we have to process them newest to oldest.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) static bool bch_extent_sort_cmp(struct btree_iter_set l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) struct btree_iter_set r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) return c ? c > 0 : l.k < r.k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) struct bkey *tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) while (iter->used > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) struct btree_iter_set *top = iter->data, *i = top + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) if (iter->used > 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) bch_extent_sort_cmp(i[0], i[1]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) if (!KEY_SIZE(i->k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) sort_key_next(iter, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) heap_sift(iter, i - top, bch_extent_sort_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (top->k > i->k) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) if (bkey_cmp(top->k, i->k) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) sort_key_next(iter, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) bch_cut_front(top->k, i->k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) heap_sift(iter, i - top, bch_extent_sort_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) /* can't happen because of comparison func */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (bkey_cmp(i->k, top->k) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) bkey_copy(tmp, top->k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) bch_cut_back(&START_KEY(i->k), tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) bch_cut_front(i->k, top->k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) heap_sift(iter, 0, bch_extent_sort_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) return tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) bch_cut_back(&START_KEY(i->k), top->k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) static void bch_subtract_dirty(struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) uint64_t offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) int sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) if (KEY_DIRTY(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) offset, -sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) static bool bch_extent_insert_fixup(struct btree_keys *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) struct bkey *insert,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) struct btree_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct bkey *replace_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) struct cache_set *c = container_of(b, struct btree, keys)->c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) uint64_t old_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) unsigned int old_size, sectors_found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) BUG_ON(!KEY_OFFSET(insert));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) BUG_ON(!KEY_SIZE(insert));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) struct bkey *k = bch_btree_iter_next(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) if (!k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (bkey_cmp(&START_KEY(k), insert) >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (KEY_SIZE(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (bkey_cmp(k, &START_KEY(insert)) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) old_offset = KEY_START(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) old_size = KEY_SIZE(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * We might overlap with 0 size extents; we can't skip these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * because if they're in the set we're inserting to we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * adjust them so they don't overlap with the key we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * inserting. But we don't want to check them for replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (replace_key && KEY_SIZE(k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * k might have been split since we inserted/found the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * key we're replacing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) uint64_t offset = KEY_START(k) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) KEY_START(replace_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) /* But it must be a subset of the replace key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if (KEY_START(k) < KEY_START(replace_key) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) KEY_OFFSET(k) > KEY_OFFSET(replace_key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) goto check_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) /* We didn't find a key that we were supposed to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (KEY_START(k) > KEY_START(insert) + sectors_found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) goto check_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (!bch_bkey_equal_header(k, replace_key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) goto check_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) /* skip past gen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) offset <<= 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) BUG_ON(!KEY_PTRS(replace_key));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) for (i = 0; i < KEY_PTRS(replace_key); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (k->ptr[i] != replace_key->ptr[i] + offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) goto check_failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) sectors_found = KEY_OFFSET(k) - KEY_START(insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) if (bkey_cmp(insert, k) < 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * We overlapped in the middle of an existing key: that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * means we have to split the old key. But we have to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * slightly different things depending on whether the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * old key has been written out yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) struct bkey *top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) bch_subtract_dirty(k, c, KEY_START(insert),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) KEY_SIZE(insert));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) if (bkey_written(b, k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * We insert a new key to cover the top of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * old key, and the old key is modified in place
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * to represent the bottom split.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * It's completely arbitrary whether the new key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * is the top or the bottom, but it has to match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * up with what btree_sort_fixup() does - it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * doesn't check for this kind of overlap, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * depends on us inserting a new key for the top
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) top = bch_bset_search(b, bset_tree_last(b),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) bch_bset_insert(b, top, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) BKEY_PADDED(key) temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) bkey_copy(&temp.key, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) bch_bset_insert(b, k, &temp.key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) top = bkey_next(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) bch_cut_front(insert, top);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) bch_cut_back(&START_KEY(insert), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) bch_bset_fix_invalidated_key(b, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) if (bkey_cmp(insert, k) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) bch_cut_front(insert, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) old_offset = KEY_START(insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (bkey_written(b, k) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * Completely overwrote, so we don't have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * invalidate the binary search tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) bch_cut_front(k, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) __bch_cut_back(&START_KEY(insert), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) bch_bset_fix_invalidated_key(b, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) check_failed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) if (replace_key) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) if (!sectors_found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) } else if (sectors_found < KEY_SIZE(insert)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) (KEY_SIZE(insert) - sectors_found));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) SET_KEY_SIZE(insert, sectors_found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (KEY_DIRTY(insert))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) bcache_dev_sectors_dirty_add(c, KEY_INODE(insert),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) KEY_START(insert),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) KEY_SIZE(insert));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) char buf[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) if (!KEY_SIZE(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) if (KEY_SIZE(k) > KEY_OFFSET(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) if (__ptr_invalid(c, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) bch_extent_to_text(buf, sizeof(buf), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) struct btree *b = container_of(bk, struct btree, keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) return __bch_extent_invalid(b->c, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) unsigned int ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct bucket *g = PTR_BUCKET(b->c, k, ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) char buf[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (mutex_trylock(&b->c->bucket_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (b->c->gc_mark_valid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) (!GC_MARK(g) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) GC_MARK(g) == GC_MARK_METADATA ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (g->prio == BTREE_PRIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) mutex_unlock(&b->c->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) mutex_unlock(&b->c->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) bch_extent_to_text(buf, sizeof(buf), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) btree_bug(b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) "inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) g->prio, g->gen, g->last_gc, GC_MARK(g));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) struct btree *b = container_of(bk, struct btree, keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) unsigned int i, stale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) char buf[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (!KEY_PTRS(k) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) bch_extent_invalid(bk, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) if (!ptr_available(b->c, k, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) for (i = 0; i < KEY_PTRS(k); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) stale = ptr_stale(b->c, k, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) if (stale && KEY_DIRTY(k)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) bch_extent_to_text(buf, sizeof(buf), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) pr_info("stale dirty pointer, stale %u, key: %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) stale, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) btree_bug_on(stale > BUCKET_GC_GEN_MAX, b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) "key too stale: %i, need_gc %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) stale, b->c->need_gc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) if (stale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (expensive_debug_checks(b->c) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) bch_extent_bad_expensive(b, k, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) ~((uint64_t)1 << 63);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) static bool bch_extent_merge(struct btree_keys *bk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) struct bkey *l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) struct bkey *r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) struct btree *b = container_of(bk, struct btree, keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (key_merging_disabled(b->c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) for (i = 0; i < KEY_PTRS(l); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) /* Keys with no pointers aren't restricted to one bucket and could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) * overflow KEY_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) SET_KEY_SIZE(l, USHRT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) bch_cut_front(l, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (KEY_CSUM(l)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) if (KEY_CSUM(r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) SET_KEY_CSUM(l, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) const struct btree_keys_ops bch_extent_keys_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) .sort_cmp = bch_extent_sort_cmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) .sort_fixup = bch_extent_sort_fixup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) .insert_fixup = bch_extent_insert_fixup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) .key_invalid = bch_extent_invalid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) .key_bad = bch_extent_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) .key_merge = bch_extent_merge,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) .key_to_text = bch_extent_to_text,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) .key_dump = bch_bkey_dump,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) .is_extents = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) };