^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2007 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "disk-io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "print-tree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "transaction.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "locking.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Defrag all the leaves in a given btree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Read all the leaves and try to get key order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * better reflect disk order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) struct btrfs_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) struct btrfs_path *path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) int wret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) int next_key_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) u64 last_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) if (root->fs_info->extent_root == root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * there's recursion here right now in the tree locking,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * we can't defrag the extent root without deadlock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) level = btrfs_header_level(root->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) if (level == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if (root->defrag_progress.objectid == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct extent_buffer *root_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) u32 nritems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) root_node = btrfs_lock_root_node(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) btrfs_set_lock_blocking_write(root_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) nritems = btrfs_header_nritems(root_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) root->defrag_max.objectid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* from above we know this is not a leaf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) btrfs_node_key_to_cpu(root_node, &root->defrag_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) nritems - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) btrfs_tree_unlock(root_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) free_extent_buffer(root_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) memset(&key, 0, sizeof(key));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) memcpy(&key, &root->defrag_progress, sizeof(key));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) path->keep_locks = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * We don't need a lock on a leaf. btrfs_realloc_node() will lock all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * leafs from path->nodes[1], so set lowest_level to 1 to avoid later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * a deadlock (attempting to write lock an already write locked leaf).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) path->lowest_level = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) if (wret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ret = wret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) if (!path->nodes[1]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * The node at level 1 must always be locked when our path has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * keep_locks set and lowest_level is 1, regardless of the value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * path->slots[1].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) BUG_ON(path->locks[1] == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) ret = btrfs_realloc_node(trans, root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) path->nodes[1], 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) &last_ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) &root->defrag_progress);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) WARN_ON(ret == -EAGAIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * Now that we reallocated the node we can find the next key. Note that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * btrfs_find_next_key() can release our path and do another search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * without COWing, this is because even with path->keep_locks = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * node when path->slots[node_level - 1] does not point to the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * we search for the next key after reallocating our node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) path->slots[1] = btrfs_header_nritems(path->nodes[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) next_key_ret = btrfs_find_next_key(root, path, &key, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) BTRFS_OLDEST_GENERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (next_key_ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) memcpy(&root->defrag_progress, &key, sizeof(key));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (ret == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) if (root->defrag_max.objectid > root->defrag_progress.objectid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) if (root->defrag_max.type > root->defrag_progress.type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (root->defrag_max.offset > root->defrag_progress.offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (ret != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) memset(&root->defrag_progress, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) sizeof(root->defrag_progress));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }