^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (C) 2011 Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This file is released under the GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "dm-btree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "dm-btree-internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "dm-transaction-manager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Removing an entry from a btree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * ==============================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * A very important constraint for our btree is that no node, except the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * root, may have fewer than a certain number of entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * Ensuring this is complicated by the way we want to only ever hold the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * locks on 2 nodes concurrently, and only change nodes in a top to bottom
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * fashion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * Each node may have a left or right sibling. When decending the spine,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * if a node contains only MIN_ENTRIES then we try and increase this to at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * least MIN_ENTRIES + 1. We do this in the following ways:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * [A] No siblings => this can only happen if the node is the root, in which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * case we copy the childs contents over the root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * [B] No left sibling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * ==> rebalance(node, right sibling)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * [C] No right sibling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * ==> rebalance(left sibling, node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * ==> delete node adding it's contents to left and right
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * ==> rebalance(left, node, right)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * After these operations it's possible that the our original node no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * longer contains the desired sub tree. For this reason this rebalancing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * is performed on the children of the current node. This also avoids
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * having a special case for the root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Once this rebalancing has occurred we can then step into the child node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * for internal nodes. Or delete the entry for leaf nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * Some little utilities for moving node data around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) static void node_shift(struct btree_node *n, int shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) uint32_t value_size = le32_to_cpu(n->header.value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) if (shift < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) shift = -shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) BUG_ON(shift > nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) memmove(key_ptr(n, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) key_ptr(n, shift),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) (nr_entries - shift) * sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) memmove(value_ptr(n, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) value_ptr(n, shift),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) (nr_entries - shift) * value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) memmove(key_ptr(n, shift),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) key_ptr(n, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) nr_entries * sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) memmove(value_ptr(n, shift),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) value_ptr(n, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) nr_entries * value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static void node_copy(struct btree_node *left, struct btree_node *right, int shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) uint32_t value_size = le32_to_cpu(left->header.value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) BUG_ON(value_size != le32_to_cpu(right->header.value_size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (shift < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) shift = -shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) memcpy(key_ptr(left, nr_left),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) key_ptr(right, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) shift * sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) memcpy(value_ptr(left, nr_left),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) value_ptr(right, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) shift * value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) BUG_ON(shift > le32_to_cpu(right->header.max_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) memcpy(key_ptr(right, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) key_ptr(left, nr_left - shift),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) shift * sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) memcpy(value_ptr(right, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) value_ptr(left, nr_left - shift),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) shift * value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * Delete a specific entry from a leaf node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static void delete_at(struct btree_node *n, unsigned index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) unsigned nr_to_copy = nr_entries - (index + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) uint32_t value_size = le32_to_cpu(n->header.value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) BUG_ON(index >= nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) if (nr_to_copy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) memmove(key_ptr(n, index),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) key_ptr(n, index + 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) nr_to_copy * sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) memmove(value_ptr(n, index),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) value_ptr(n, index + 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) nr_to_copy * value_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) n->header.nr_entries = cpu_to_le32(nr_entries - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) static unsigned merge_threshold(struct btree_node *n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) return le32_to_cpu(n->header.max_entries) / 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct child {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) unsigned index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) struct dm_block *block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) struct btree_node *parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) unsigned index, struct child *result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) int r, inc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) dm_block_t root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) result->index = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) root = value64(parent, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) &result->block, &inc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) result->n = dm_block_data(result->block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (inc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) inc_children(info->tm, result->n, vt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) *((__le64 *) value_ptr(parent, index)) =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) cpu_to_le64(dm_block_location(result->block));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static void exit_child(struct dm_btree_info *info, struct child *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) dm_tm_unlock(info->tm, c->block);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static void shift(struct btree_node *left, struct btree_node *right, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) uint32_t max_entries = le32_to_cpu(left->header.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) BUG_ON(max_entries != r_max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) BUG_ON(nr_left - count > max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) BUG_ON(nr_right + count > max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (!count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) node_shift(right, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) node_copy(left, right, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) node_copy(left, right, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) node_shift(right, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) left->header.nr_entries = cpu_to_le32(nr_left - count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) right->header.nr_entries = cpu_to_le32(nr_right + count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) struct child *l, struct child *r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct btree_node *left = l->n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct btree_node *right = r->n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * Ensure the number of entries in each child will be greater
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * than or equal to (max_entries / 3 + 1), so no matter which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * child is used for removal, the number will still be not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * less than (max_entries / 3).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) unsigned int threshold = 2 * (merge_threshold(left) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (nr_left + nr_right < threshold) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * Merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) node_copy(left, right, -nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) delete_at(parent, r->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * We need to decrement the right block, but not it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * children, since they're still referenced by left.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) dm_tm_dec(info->tm, dm_block_location(r->block));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * Rebalance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) unsigned target_left = (nr_left + nr_right) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) shift(left, right, nr_left - target_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) *key_ptr(parent, r->index) = right->keys[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) struct dm_btree_value_type *vt, unsigned left_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) struct btree_node *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) struct child left, right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) parent = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) r = init_child(info, vt, parent, left_index, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) r = init_child(info, vt, parent, left_index + 1, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) exit_child(info, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) __rebalance2(info, parent, &left, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) exit_child(info, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) exit_child(info, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * We dump as many entries from center as possible into left, then the rest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * in right, then rebalance2. This wastes some cpu, but I want something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * simple atm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) struct child *l, struct child *c, struct child *r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) struct btree_node *left, struct btree_node *center, struct btree_node *right,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) uint32_t max_entries = le32_to_cpu(left->header.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) unsigned shift = min(max_entries - nr_left, nr_center);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) BUG_ON(nr_left + shift > max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) node_copy(left, center, -shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) left->header.nr_entries = cpu_to_le32(nr_left + shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) if (shift != nr_center) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) shift = nr_center - shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) BUG_ON((nr_right + shift) > max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) node_shift(right, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) node_copy(center, right, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) right->header.nr_entries = cpu_to_le32(nr_right + shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) *key_ptr(parent, r->index) = right->keys[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) delete_at(parent, c->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) r->index--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) dm_tm_dec(info->tm, dm_block_location(c->block));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) __rebalance2(info, parent, l, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * Redistributes entries among 3 sibling nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct child *l, struct child *c, struct child *r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) struct btree_node *left, struct btree_node *center, struct btree_node *right,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) int s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) uint32_t max_entries = le32_to_cpu(left->header.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) unsigned total = nr_left + nr_center + nr_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) unsigned target_right = total / 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) unsigned remainder = (target_right * 3) != total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) unsigned target_left = target_right + remainder;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) BUG_ON(target_left > max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) BUG_ON(target_right > max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (nr_left < nr_right) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) s = nr_left - target_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) if (s < 0 && nr_center < -s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) /* not enough in central node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) shift(left, center, -nr_center);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) s += nr_center;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) shift(left, right, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) nr_right += s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) shift(left, center, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) shift(center, right, target_right - nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) s = target_right - nr_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) if (s > 0 && nr_center < s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) /* not enough in central node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) shift(center, right, nr_center);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) s -= nr_center;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) shift(left, right, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) nr_left -= s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) shift(center, right, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) shift(left, center, nr_left - target_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) *key_ptr(parent, c->index) = center->keys[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) *key_ptr(parent, r->index) = right->keys[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) struct child *l, struct child *c, struct child *r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) struct btree_node *left = l->n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) struct btree_node *center = c->n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) struct btree_node *right = r->n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) unsigned threshold = merge_threshold(left) * 4 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) BUG_ON(left->header.max_entries != center->header.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) BUG_ON(center->header.max_entries != right->header.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if ((nr_left + nr_center + nr_right) < threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) delete_center_node(info, parent, l, c, r, left, center, right,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) nr_left, nr_center, nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) redistribute3(info, parent, l, c, r, left, center, right,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) nr_left, nr_center, nr_right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) struct dm_btree_value_type *vt, unsigned left_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) struct btree_node *parent = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) struct child left, center, right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * FIXME: fill out an array?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) r = init_child(info, vt, parent, left_index, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) r = init_child(info, vt, parent, left_index + 1, ¢er);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) exit_child(info, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) r = init_child(info, vt, parent, left_index + 2, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) exit_child(info, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) exit_child(info, ¢er);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) __rebalance3(info, parent, &left, ¢er, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) exit_child(info, &left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) exit_child(info, ¢er);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) exit_child(info, &right);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) static int rebalance_children(struct shadow_spine *s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) struct dm_btree_info *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) struct dm_btree_value_type *vt, uint64_t key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) int i, r, has_left_sibling, has_right_sibling;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) n = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) if (le32_to_cpu(n->header.nr_entries) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct dm_block *child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) dm_block_t b = value64(n, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) memcpy(n, dm_block_data(child),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) dm_bm_block_size(dm_tm_get_bm(info->tm)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) dm_tm_dec(info->tm, dm_block_location(child));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) dm_tm_unlock(info->tm, child);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) i = lower_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (i < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) has_left_sibling = i > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) if (!has_left_sibling)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) r = rebalance2(s, info, vt, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) else if (!has_right_sibling)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) r = rebalance2(s, info, vt, i - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) r = rebalance3(s, info, vt, i - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) int i = lower_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if ((i < 0) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) (i >= le32_to_cpu(n->header.nr_entries)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) (le64_to_cpu(n->keys[i]) != key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) *index = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * Prepares for removal from one level of the hierarchy. The caller must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * call delete_at() to remove the entry at index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) struct dm_btree_value_type *vt, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) uint64_t key, unsigned *index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) int i = *index, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) r = shadow_step(s, root, vt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * We have to patch up the parent node, ugly, but I don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * see a way to do this automatically as part of the spine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * op.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (shadow_has_parent(s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) &location, sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) n = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (le32_to_cpu(n->header.flags) & LEAF_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return do_leaf(n, key, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) r = rebalance_children(s, info, vt, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) n = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) if (le32_to_cpu(n->header.flags) & LEAF_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) return do_leaf(n, key, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) i = lower_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * We know the key is present, or else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * rebalance_children would have returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * -ENODATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) root = value64(n, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) uint64_t *keys, dm_block_t *new_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) unsigned level, last_level = info->levels - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) int index = 0, r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) struct shadow_spine spine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) struct dm_btree_value_type le64_vt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) init_le64_type(info->tm, &le64_vt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) init_shadow_spine(&spine, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) for (level = 0; level < info->levels; level++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) r = remove_raw(&spine, info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) (level == last_level ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) &info->value_type : &le64_vt),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) root, keys[level], (unsigned *)&index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) n = dm_block_data(shadow_current(&spine));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (level != last_level) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) root = value64(n, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (info->value_type.dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) info->value_type.dec(info->value_type.context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) value_ptr(n, index));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) delete_at(n, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (!r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) *new_root = shadow_root(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) exit_shadow_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) EXPORT_SYMBOL_GPL(dm_btree_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) static int remove_nearest(struct shadow_spine *s, struct dm_btree_info *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) struct dm_btree_value_type *vt, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) uint64_t key, int *index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) int i = *index, r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) r = shadow_step(s, root, vt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * We have to patch up the parent node, ugly, but I don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * see a way to do this automatically as part of the spine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * op.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) if (shadow_has_parent(s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) &location, sizeof(__le64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) n = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) *index = lower_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) r = rebalance_children(s, info, vt, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) n = dm_block_data(shadow_current(s));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) if (le32_to_cpu(n->header.flags) & LEAF_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) *index = lower_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) i = lower_bound(n, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * We know the key is present, or else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * rebalance_children would have returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * -ENODATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) root = value64(n, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) static int remove_one(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) uint64_t *keys, uint64_t end_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) dm_block_t *new_root, unsigned *nr_removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) unsigned level, last_level = info->levels - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) int index = 0, r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) struct shadow_spine spine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) struct btree_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) struct dm_btree_value_type le64_vt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) uint64_t k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) init_le64_type(info->tm, &le64_vt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) init_shadow_spine(&spine, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) for (level = 0; level < last_level; level++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) r = remove_raw(&spine, info, &le64_vt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) root, keys[level], (unsigned *) &index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) n = dm_block_data(shadow_current(&spine));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) root = value64(n, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) r = remove_nearest(&spine, info, &info->value_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) root, keys[last_level], &index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) n = dm_block_data(shadow_current(&spine));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) if (index < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (index >= le32_to_cpu(n->header.nr_entries)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) r = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) k = le64_to_cpu(n->keys[index]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (k >= keys[last_level] && k < end_key) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (info->value_type.dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) info->value_type.dec(info->value_type.context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) value_ptr(n, index));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) delete_at(n, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) keys[last_level] = k + 1ull;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) r = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) *new_root = shadow_root(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) exit_shadow_spine(&spine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) uint64_t *first_key, uint64_t end_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) dm_block_t *new_root, unsigned *nr_removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) *nr_removed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) r = remove_one(info, root, first_key, end_key, &root, nr_removed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) if (!r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) (*nr_removed)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) } while (!r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) *new_root = root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) return r == -ENODATA ? 0 : r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) EXPORT_SYMBOL_GPL(dm_btree_remove_leaves);