^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (C) 2011 Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This file is released under the GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "dm-btree-internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "dm-space-map.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "dm-transaction-manager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/device-mapper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #define DM_MSG_PREFIX "btree"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*----------------------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Array manipulation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *--------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) static void memcpy_disk(void *dest, const void *src, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) __dm_written_to_disk(src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) memcpy(dest, src, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) __dm_unbless_for_disk(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) unsigned index, void *elt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) __dm_written_to_disk(elt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) if (index < nr_elts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) memmove(base + (elt_size * (index + 1)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) base + (elt_size * index),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) (nr_elts - index) * elt_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) memcpy_disk(base + (elt_size * index), elt, elt_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) /* makes the assumption that no two keys are the same. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static int bsearch(struct btree_node *n, uint64_t key, int want_hi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) while (hi - lo > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) int mid = lo + ((hi - lo) / 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) uint64_t mid_key = le64_to_cpu(n->keys[mid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) if (mid_key == key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) return mid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) if (mid_key < key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) lo = mid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) hi = mid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) return want_hi ? hi : lo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) int lower_bound(struct btree_node *n, uint64_t key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return bsearch(n, key, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static int upper_bound(struct btree_node *n, uint64_t key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) return bsearch(n, key, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct dm_btree_value_type *vt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) unsigned i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) for (i = 0; i < nr_entries; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) dm_tm_inc(tm, value64(n, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) else if (vt->inc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) for (i = 0; i < nr_entries; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) vt->inc(vt->context, value_ptr(n, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) uint64_t key, void *value)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) __dm_written_to_disk(value)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) uint32_t max_entries = le32_to_cpu(node->header.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) __le64 key_le = cpu_to_le64(key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) if (index > nr_entries ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) index >= max_entries ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) nr_entries >= max_entries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) DMERR("too many entries in btree node for insert");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) __dm_unbless_for_disk(value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) __dm_bless_for_disk(&key_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) array_insert(value_base(node), value_size, nr_entries, index, value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) node->header.nr_entries = cpu_to_le32(nr_entries + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * We want 3n entries (for some n). This works more nicely for repeated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * insert remove loops than (2n + 1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) static uint32_t calc_max_entries(size_t value_size, size_t block_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) uint32_t total, n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) block_size -= sizeof(struct node_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) total = block_size / elt_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) n = total / 3; /* rounds down */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) return 3 * n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) struct dm_block *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) size_t block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) uint32_t max_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) r = new_block(info, &b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) max_entries = calc_max_entries(info->value_type.size, block_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) n = dm_block_data(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) memset(n, 0, block_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) n->header.flags = cpu_to_le32(LEAF_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) n->header.nr_entries = cpu_to_le32(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) n->header.max_entries = cpu_to_le32(max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) n->header.value_size = cpu_to_le32(info->value_type.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) *root = dm_block_location(b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) unlock_block(info, b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) EXPORT_SYMBOL_GPL(dm_btree_empty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * Deletion uses a recursive algorithm, since we have limited stack space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * we explicitly manage our own stack on the heap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) #define MAX_SPINE_DEPTH 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) struct frame {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) struct dm_block *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) unsigned level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) unsigned nr_children;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) unsigned current_child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) struct del_stack {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct dm_btree_info *info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct dm_transaction_manager *tm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) int top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) struct frame spine[MAX_SPINE_DEPTH];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) static int top_frame(struct del_stack *s, struct frame **f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) if (s->top < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) DMERR("btree deletion stack empty");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) *f = s->spine + s->top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) static int unprocessed_frames(struct del_stack *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return s->top >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) static void prefetch_children(struct del_stack *s, struct frame *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) unsigned i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) struct dm_block_manager *bm = dm_tm_get_bm(s->tm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) for (i = 0; i < f->nr_children; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) dm_bm_prefetch(bm, value64(f->n, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) return f->level < (info->levels - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) uint32_t ref_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) if (s->top >= MAX_SPINE_DEPTH - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) DMERR("btree deletion stack out of memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) r = dm_tm_ref(s->tm, b, &ref_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (ref_count > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * This is a shared node, so we can just decrement it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * reference counter and leave the children.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) dm_tm_dec(s->tm, b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) uint32_t flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) struct frame *f = s->spine + ++s->top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) s->top--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) f->n = dm_block_data(f->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) f->level = level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) f->nr_children = le32_to_cpu(f->n->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) f->current_child = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) flags = le32_to_cpu(f->n->header.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (flags & INTERNAL_NODE || is_internal_level(s->info, f))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) prefetch_children(s, f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) static void pop_frame(struct del_stack *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) struct frame *f = s->spine + s->top--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) dm_tm_dec(s->tm, dm_block_location(f->b));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) dm_tm_unlock(s->tm, f->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) static void unlock_all_frames(struct del_stack *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) struct frame *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) while (unprocessed_frames(s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) f = s->spine + s->top--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) dm_tm_unlock(s->tm, f->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) struct del_stack *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * dm_btree_del() is called via an ioctl, as such should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * considered an FS op. We can't recurse back into the FS, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * allocate GFP_NOFS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) s = kmalloc(sizeof(*s), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) s->info = info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) s->tm = info->tm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) s->top = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) r = push_frame(s, root, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) while (unprocessed_frames(s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) uint32_t flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) struct frame *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) dm_block_t b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) r = top_frame(s, &f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (f->current_child >= f->nr_children) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) pop_frame(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) flags = le32_to_cpu(f->n->header.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (flags & INTERNAL_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) b = value64(f->n, f->current_child);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) f->current_child++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) r = push_frame(s, b, f->level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) } else if (is_internal_level(info, f)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) b = value64(f->n, f->current_child);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) f->current_child++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) r = push_frame(s, b, f->level + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (info->value_type.dec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) unsigned i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) for (i = 0; i < f->nr_children; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) info->value_type.dec(info->value_type.context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) value_ptr(f->n, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) pop_frame(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* cleanup all frames of del_stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) unlock_all_frames(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) EXPORT_SYMBOL_GPL(dm_btree_del);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) int (*search_fn)(struct btree_node *, uint64_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) uint64_t *result_key, void *v, size_t value_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) int i, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) uint32_t flags, nr_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) r = ro_step(s, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) i = search_fn(ro_node(s), key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) flags = le32_to_cpu(ro_node(s)->header.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) if (i < 0 || i >= nr_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (flags & INTERNAL_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) block = value64(ro_node(s), i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) } while (!(flags & LEAF_NODE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) *result_key = le64_to_cpu(ro_node(s)->keys[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) memcpy(v, value_ptr(ro_node(s), i), value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) uint64_t *keys, void *value_le)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) unsigned level, last_level = info->levels - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) int r = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) uint64_t rkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) __le64 internal_value_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) struct ro_spine spine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) init_ro_spine(&spine, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) for (level = 0; level < info->levels; level++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) void *value_p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) if (level == last_level) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) value_p = value_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) size = info->value_type.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) value_p = &internal_value_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) size = sizeof(uint64_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) r = btree_lookup_raw(&spine, root, keys[level],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) lower_bound, &rkey,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) value_p, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (!r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) if (rkey != keys[level]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) exit_ro_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) exit_ro_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) root = le64_to_cpu(internal_value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) exit_ro_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) EXPORT_SYMBOL_GPL(dm_btree_lookup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) uint64_t key, uint64_t *rkey, void *value_le)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) int r, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) uint32_t flags, nr_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) struct dm_block *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) r = bn_read_lock(info, root, &node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) n = dm_block_data(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) flags = le32_to_cpu(n->header.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) nr_entries = le32_to_cpu(n->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) if (flags & INTERNAL_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) i = lower_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (i < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * avoid early -ENODATA return when all entries are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * higher than the search @key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) if (i >= nr_entries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) r = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (r == -ENODATA && i < (nr_entries - 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) i = upper_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (i < 0 || i >= nr_entries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) r = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) *rkey = le64_to_cpu(n->keys[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) memcpy(value_le, value_ptr(n, i), info->value_type.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) dm_tm_unlock(info->tm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) uint64_t *keys, uint64_t *rkey, void *value_le)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) unsigned level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) int r = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) __le64 internal_value_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) struct ro_spine spine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) init_ro_spine(&spine, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) for (level = 0; level < info->levels - 1u; level++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) r = btree_lookup_raw(&spine, root, keys[level],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) lower_bound, rkey,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) &internal_value_le, sizeof(uint64_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (*rkey != keys[level]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) r = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) root = le64_to_cpu(internal_value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) r = dm_btree_lookup_next_single(info, root, keys[level], rkey, value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) exit_ro_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * Splits a node by creating a sibling node and shifting half the nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * contents across. Assumes there is a parent node, and it has room for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * another child.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * Before:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * +--------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * | Parent |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * +--------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * v
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * +----------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * | A ++++++ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * +----------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * After:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * +--------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) * | Parent |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) * +--------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * v +------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * +---------+ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * | A* +++ | v
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * +---------+ +-------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * | B +++ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * +-------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * Where A* is a shadow of A.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) uint64_t key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) unsigned nr_left, nr_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) struct dm_block *left, *right, *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) struct btree_node *ln, *rn, *pn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) __le64 location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) left = shadow_current(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) r = new_block(s->info, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) ln = dm_block_data(left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) rn = dm_block_data(right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) ln->header.nr_entries = cpu_to_le32(nr_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) rn->header.flags = ln->header.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) rn->header.nr_entries = cpu_to_le32(nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) rn->header.max_entries = ln->header.max_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) rn->header.value_size = ln->header.value_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) sizeof(uint64_t) : s->info->value_type.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) size * nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * Patch up the parent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) parent = shadow_parent(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) pn = dm_block_data(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) location = cpu_to_le64(dm_block_location(left));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) __dm_bless_for_disk(&location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) memcpy_disk(value_ptr(pn, parent_index),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) &location, sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) location = cpu_to_le64(dm_block_location(right));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) __dm_bless_for_disk(&location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) r = insert_at(sizeof(__le64), pn, parent_index + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) le64_to_cpu(rn->keys[0]), &location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) unlock_block(s->info, right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (key < le64_to_cpu(rn->keys[0])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) unlock_block(s->info, right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) s->nodes[1] = left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) unlock_block(s->info, left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) s->nodes[1] = right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) * Splits a node by creating two new children beneath the given node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * Before:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * +----------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * | A ++++++ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * +----------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) * After:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * +------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) * | A (shadow) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * +------------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * +------+ +----+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) * v v
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * +-------+ +-------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * | B +++ | | C +++ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * +-------+ +-------+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) unsigned nr_left, nr_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) struct dm_block *left, *right, *new_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) struct btree_node *pn, *ln, *rn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) __le64 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) new_parent = shadow_current(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) pn = dm_block_data(new_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) sizeof(__le64) : s->info->value_type.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) /* create & init the left block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) r = new_block(s->info, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) ln = dm_block_data(left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) ln->header.flags = pn->header.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) ln->header.nr_entries = cpu_to_le32(nr_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) ln->header.max_entries = pn->header.max_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) ln->header.value_size = pn->header.value_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) /* create & init the right block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) r = new_block(s->info, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (r < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) unlock_block(s->info, left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) rn = dm_block_data(right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) rn->header.flags = pn->header.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) rn->header.nr_entries = cpu_to_le32(nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) rn->header.max_entries = pn->header.max_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) rn->header.value_size = pn->header.value_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) nr_right * size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) /* new_parent should just point to l and r now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) pn->header.flags = cpu_to_le32(INTERNAL_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) pn->header.nr_entries = cpu_to_le32(2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) pn->header.max_entries = cpu_to_le32(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) calc_max_entries(sizeof(__le64),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) dm_bm_block_size(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) dm_tm_get_bm(s->info->tm))));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) pn->header.value_size = cpu_to_le32(sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) val = cpu_to_le64(dm_block_location(left));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) __dm_bless_for_disk(&val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) pn->keys[0] = ln->keys[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) val = cpu_to_le64(dm_block_location(right));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) __dm_bless_for_disk(&val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) pn->keys[1] = rn->keys[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) unlock_block(s->info, left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) unlock_block(s->info, right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) struct dm_btree_value_type *vt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) uint64_t key, unsigned *index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) int r, i = *index, top = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) struct btree_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) r = shadow_step(s, root, vt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) node = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) * We have to patch up the parent node, ugly, but I don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) * see a way to do this automatically as part of the spine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) * op.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) __dm_bless_for_disk(&location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) &location, sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) node = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) if (node->header.nr_entries == node->header.max_entries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) if (top)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) r = btree_split_beneath(s, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) r = btree_split_sibling(s, i, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) node = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) i = lower_bound(node, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (le32_to_cpu(node->header.flags) & LEAF_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) if (i < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) /* change the bounds on the lowest key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) node->keys[0] = cpu_to_le64(key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) root = value64(node, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) top = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) if (i < 0 || le64_to_cpu(node->keys[i]) != key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) *index = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) static bool need_insert(struct btree_node *node, uint64_t *keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) unsigned level, unsigned index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) return ((index >= le32_to_cpu(node->header.nr_entries)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) (le64_to_cpu(node->keys[index]) != keys[level]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) static int insert(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) uint64_t *keys, void *value, dm_block_t *new_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) int *inserted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) __dm_written_to_disk(value)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) unsigned level, index = -1, last_level = info->levels - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) dm_block_t block = root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct shadow_spine spine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) struct dm_btree_value_type le64_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) init_le64_type(info->tm, &le64_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) init_shadow_spine(&spine, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) for (level = 0; level < (info->levels - 1); level++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) n = dm_block_data(shadow_current(&spine));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) if (need_insert(n, keys, level, index)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) dm_block_t new_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) __le64 new_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) r = dm_btree_empty(info, &new_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) new_le = cpu_to_le64(new_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) __dm_bless_for_disk(&new_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) r = insert_at(sizeof(uint64_t), n, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) keys[level], &new_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) if (level < last_level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) block = value64(n, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) r = btree_insert_raw(&spine, block, &info->value_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) keys[level], &index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) n = dm_block_data(shadow_current(&spine));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) if (need_insert(n, keys, level, index)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) if (inserted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) *inserted = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) r = insert_at(info->value_type.size, n, index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) keys[level], value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) goto bad_unblessed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) if (inserted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) *inserted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (info->value_type.dec &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) (!info->value_type.equal ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) !info->value_type.equal(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) info->value_type.context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) value_ptr(n, index),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) value))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) info->value_type.dec(info->value_type.context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) value_ptr(n, index));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) memcpy_disk(value_ptr(n, index),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) value, info->value_type.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) *new_root = shadow_root(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) exit_shadow_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) __dm_unbless_for_disk(value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) bad_unblessed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) exit_shadow_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) uint64_t *keys, void *value, dm_block_t *new_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) __dm_written_to_disk(value)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) return insert(info, root, keys, value, new_root, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) EXPORT_SYMBOL_GPL(dm_btree_insert);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) uint64_t *keys, void *value, dm_block_t *new_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) int *inserted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) __dm_written_to_disk(value)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) return insert(info, root, keys, value, new_root, inserted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) uint64_t *result_key, dm_block_t *next_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) int i, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) uint32_t flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) r = ro_step(s, block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) flags = le32_to_cpu(ro_node(s)->header.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) i = le32_to_cpu(ro_node(s)->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) if (!i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) i--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) if (find_highest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) *result_key = le64_to_cpu(ro_node(s)->keys[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) *result_key = le64_to_cpu(ro_node(s)->keys[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (next_block || flags & INTERNAL_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) if (find_highest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) block = value64(ro_node(s), i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) block = value64(ro_node(s), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) } while (flags & INTERNAL_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) if (next_block)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) *next_block = block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) bool find_highest, uint64_t *result_keys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) int r = 0, count = 0, level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) struct ro_spine spine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) init_ro_spine(&spine, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) for (level = 0; level < info->levels; level++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) r = find_key(&spine, root, find_highest, result_keys + level,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) level == info->levels - 1 ? NULL : &root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (r == -ENODATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) } else if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) exit_ro_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) return r ? r : count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) uint64_t *result_keys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) return dm_btree_find_key(info, root, true, result_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) uint64_t *result_keys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) return dm_btree_find_key(info, root, false, result_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * FIXME: We shouldn't use a recursive algorithm when we have limited stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * space. Also this only works for single level trees.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) static int walk_node(struct dm_btree_info *info, dm_block_t block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) int (*fn)(void *context, uint64_t *keys, void *leaf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) void *context)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) unsigned i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) struct dm_block *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) uint64_t keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) r = bn_read_lock(info, block, &node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) n = dm_block_data(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) nr = le32_to_cpu(n->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) r = walk_node(info, value64(n, i), fn, context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) keys = le64_to_cpu(*key_ptr(n, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) r = fn(context, &keys, value_ptr(n, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) dm_tm_unlock(info->tm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) int (*fn)(void *context, uint64_t *keys, void *leaf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) void *context)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) BUG_ON(info->levels > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) return walk_node(info, root, fn, context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) EXPORT_SYMBOL_GPL(dm_btree_walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) static void prefetch_values(struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) unsigned i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) __le64 value_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) struct cursor_node *n = c->nodes + c->depth - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) struct btree_node *bn = dm_block_data(n->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) struct dm_block_manager *bm = dm_tm_get_bm(c->info->tm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) BUG_ON(c->info->value_type.size != sizeof(value_le));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) nr = le32_to_cpu(bn->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) memcpy(&value_le, value_ptr(bn, i), sizeof(value_le));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) dm_bm_prefetch(bm, le64_to_cpu(value_le));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) static bool leaf_node(struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) struct cursor_node *n = c->nodes + c->depth - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) struct btree_node *bn = dm_block_data(n->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) return le32_to_cpu(bn->header.flags) & LEAF_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) static int push_node(struct dm_btree_cursor *c, dm_block_t b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) struct cursor_node *n = c->nodes + c->depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) if (c->depth >= DM_BTREE_CURSOR_MAX_DEPTH - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) DMERR("couldn't push cursor node, stack depth too high");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) r = bn_read_lock(c->info, b, &n->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) n->index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) c->depth++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) if (c->prefetch_leaves || !leaf_node(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) prefetch_values(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) static void pop_node(struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) c->depth--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) unlock_block(c->info, c->nodes[c->depth].b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) static int inc_or_backtrack(struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) struct cursor_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) struct btree_node *bn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) if (!c->depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) n = c->nodes + c->depth - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) bn = dm_block_data(n->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) n->index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) if (n->index < le32_to_cpu(bn->header.nr_entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) pop_node(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) static int find_leaf(struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) int r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) struct cursor_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) struct btree_node *bn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) __le64 value_le;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) n = c->nodes + c->depth - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) bn = dm_block_data(n->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) if (le32_to_cpu(bn->header.flags) & LEAF_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) memcpy(&value_le, value_ptr(bn, n->index), sizeof(value_le));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) r = push_node(c, le64_to_cpu(value_le));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) DMERR("push_node failed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (!r && (le32_to_cpu(bn->header.nr_entries) == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) bool prefetch_leaves, struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) c->info = info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) c->root = root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) c->depth = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) c->prefetch_leaves = prefetch_leaves;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) r = push_node(c, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) return find_leaf(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) EXPORT_SYMBOL_GPL(dm_btree_cursor_begin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) void dm_btree_cursor_end(struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) while (c->depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) pop_node(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) EXPORT_SYMBOL_GPL(dm_btree_cursor_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) int dm_btree_cursor_next(struct dm_btree_cursor *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) int r = inc_or_backtrack(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) if (!r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) r = find_leaf(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) DMERR("find_leaf failed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) EXPORT_SYMBOL_GPL(dm_btree_cursor_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) int r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) while (count-- && !r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) r = dm_btree_cursor_next(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) EXPORT_SYMBOL_GPL(dm_btree_cursor_skip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) if (c->depth) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) struct cursor_node *n = c->nodes + c->depth - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) struct btree_node *bn = dm_block_data(n->b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) if (le32_to_cpu(bn->header.flags) & INTERNAL_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) *key = le64_to_cpu(*key_ptr(bn, n->index));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) memcpy(value_le, value_ptr(bn, n->index), c->info->value_type.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) EXPORT_SYMBOL_GPL(dm_btree_cursor_get_value);