^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (C) 2011 Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This file is released under the GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #ifndef _LINUX_DM_BTREE_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #define _LINUX_DM_BTREE_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "dm-block-manager.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) struct dm_transaction_manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Annotations used to check on-disk metadata is handled as little-endian.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #ifdef __CHECKER__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) # define __dm_written_to_disk(x) __releases(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) # define __dm_reads_from_disk(x) __acquires(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) # define __dm_bless_for_disk(x) __acquire(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) # define __dm_unbless_for_disk(x) __release(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) # define __dm_written_to_disk(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) # define __dm_reads_from_disk(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) # define __dm_bless_for_disk(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) # define __dm_unbless_for_disk(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * Information about the values stored within the btree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) struct dm_btree_value_type {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) void *context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * The size in bytes of each value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) uint32_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Any of these methods can be safely set to NULL if you do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * need the corresponding feature.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * The btree is making a duplicate of the value, for instance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * because previously-shared btree nodes have now diverged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * @value argument is the new copy that the copy function may modify.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * (Probably it just wants to increment a reference count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * somewhere.) This method is _not_ called for insertion of a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * value: It is assumed the ref count is already 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) void (*inc)(void *context, const void *value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * This value is being deleted. The btree takes care of freeing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * the memory pointed to by @value. Often the del function just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * needs to decrement a reference count somewhere.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) void (*dec)(void *context, const void *value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * A test for equality between two values. When a value is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * overwritten with a new one, the old one has the dec method
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * called _unless_ the new and old value are deemed equal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) int (*equal)(void *context, const void *value1, const void *value2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * The shape and contents of a btree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct dm_btree_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) struct dm_transaction_manager *tm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * Number of nested btrees. (Not the depth of a single tree.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) unsigned levels;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct dm_btree_value_type value_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * Set up an empty tree. O(1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Delete a tree. O(n) - this is the slow one! It can also block, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * please don't call it on an IO path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) int dm_btree_del(struct dm_btree_info *info, dm_block_t root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * All the lookup functions return -ENODATA if the key cannot be found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * Tries to find a key that matches exactly. O(ln(n))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) uint64_t *keys, void *value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * Tries to find the first key where the bottom level key is >= to that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * given. Useful for skipping empty sections of the btree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) uint64_t *keys, uint64_t *rkey, void *value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * Insertion (or overwrite an existing value). O(ln(n))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) uint64_t *keys, void *value, dm_block_t *new_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) __dm_written_to_disk(value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * A variant of insert that indicates whether it actually inserted or just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * overwrote. Useful if you're keeping track of the number of entries in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) uint64_t *keys, void *value, dm_block_t *new_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) int *inserted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) __dm_written_to_disk(value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * Remove a key if present. This doesn't remove empty sub trees. Normally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * subtrees represent a separate entity, like a snapshot map, so this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * correct behaviour. O(ln(n)).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) uint64_t *keys, dm_block_t *new_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Removes a _contiguous_ run of values starting from 'keys' and not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * reaching keys2 (where keys2 is keys with the final key replaced with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * altered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) uint64_t *keys, uint64_t end_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) dm_block_t *new_root, unsigned *nr_removed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * Returns < 0 on failure. Otherwise the number of key entries that have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * been filled out. Remember trees can have zero entries, and as such have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * no lowest key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) uint64_t *result_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * Returns < 0 on failure. Otherwise the number of key entries that have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * been filled out. Remember trees can have zero entries, and as such have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * no highest key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) uint64_t *result_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * Iterate through the a btree, calling fn() on each entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * It only works for single level trees and is internally recursive, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * monitor stack usage carefully.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) int (*fn)(void *context, uint64_t *keys, void *leaf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) void *context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*----------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * Cursor API. This does not follow the rolling lock convention. Since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * know the order that values are required we can issue prefetches to speed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * up iteration. Use on a single level btree only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) #define DM_BTREE_CURSOR_MAX_DEPTH 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct cursor_node {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct dm_block *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) unsigned index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) struct dm_btree_cursor {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) struct dm_btree_info *info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) dm_block_t root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) bool prefetch_leaves;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) unsigned depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * Creates a fresh cursor. If prefetch_leaves is set then it is assumed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * the btree contains block indexes that will be prefetched. The cursor is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * quite large, so you probably don't want to put it on the stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) bool prefetch_leaves, struct dm_btree_cursor *c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) void dm_btree_cursor_end(struct dm_btree_cursor *c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) int dm_btree_cursor_next(struct dm_btree_cursor *c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) #endif /* _LINUX_DM_BTREE_H */