^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2011 STRATO. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "volumes.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "disk-io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "transaction.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "dev-replace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "block-group.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #undef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * This is the implementation for the generic read ahead framework.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * To trigger a readahead, btrfs_reada_add must be called. It will start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * a read ahead for the given range [start, end) on tree root. The returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * handle can either be used to wait on the readahead to finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * The read ahead works as follows:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * On btrfs_reada_add, the root of the tree is inserted into a radix_tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * reada_start_machine will then search for extents to prefetch and trigger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * some reads. When a read finishes for a node, all contained node/leaf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * pointers that lie in the given range will also be enqueued. The reads will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * be triggered in sequential order, thus giving a big win over a naive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * enumeration. It will also make use of multi-device layouts. Each disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * will have its on read pointer and all disks will by utilized in parallel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * Also will no two disks read both sides of a mirror simultaneously, as this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * would waste seeking capacity. Instead both disks will read different parts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * of the filesystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * Any number of readaheads can be started in parallel. The read order will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * determined globally, i.e. 2 parallel readaheads will normally finish faster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * than the 2 started one after another.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #define MAX_IN_FLIGHT 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) struct reada_extctl {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) struct reada_control *rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) u64 generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct reada_extent {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct btrfs_key top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) struct list_head extctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) int refcnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct reada_zone *zones[BTRFS_MAX_MIRRORS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) int nzones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) int scheduled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct reada_zone {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) u64 end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) u64 elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) int locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * self */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) int ndevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) struct kref refcnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) struct reada_machine_work {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) struct btrfs_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct btrfs_fs_info *fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) static void reada_control_release(struct kref *kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static void reada_zone_release(struct kref *kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) static void reada_start_machine(struct btrfs_fs_info *fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static void __reada_start_machine(struct btrfs_fs_info *fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static int reada_add_block(struct reada_control *rc, u64 logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) struct btrfs_key *top, u64 generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) /* recurses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /* in case of err, eb might be NULL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) static void __readahead_hook(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) struct reada_extent *re, struct extent_buffer *eb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) int err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) int nritems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) u64 bytenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) u64 generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) spin_lock(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * just take the full list from the extent. afterwards we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * don't need the lock anymore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) list_replace_init(&re->extctl, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) re->scheduled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) spin_unlock(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * this is the error case, the extent buffer has not been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * read correctly. We won't access anything from it and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * just cleanup our data structures. Effectively this will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * cut the branch below this node from read ahead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * FIXME: currently we just set nritems to 0 if this is a leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * effectively ignoring the content. In a next step we could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * trigger more readahead depending from the content, e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * fetch the checksums for the extents in the leaf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (!btrfs_header_level(eb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) nritems = btrfs_header_nritems(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) generation = btrfs_header_generation(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) for (i = 0; i < nritems; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) struct reada_extctl *rec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) u64 n_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) struct btrfs_key next_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) btrfs_node_key_to_cpu(eb, &key, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (i + 1 < nritems)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) btrfs_node_key_to_cpu(eb, &next_key, i + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) next_key = re->top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) bytenr = btrfs_node_blockptr(eb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) n_gen = btrfs_node_ptr_generation(eb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) list_for_each_entry(rec, &list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) struct reada_control *rc = rec->rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * if the generation doesn't match, just ignore this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * extctl. This will probably cut off a branch from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * prefetch. Alternatively one could start a new (sub-)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * prefetch for this branch, starting again from root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * FIXME: move the generation check out of this loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) if (rec->generation != generation) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) btrfs_debug(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) "generation mismatch for (%llu,%d,%llu) %llu != %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) key.objectid, key.type, key.offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) rec->generation, generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) if (rec->generation == generation &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) reada_add_block(rc, bytenr, &next_key, n_gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * free extctl records
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) while (!list_empty(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) struct reada_control *rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) struct reada_extctl *rec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) rec = list_first_entry(&list, struct reada_extctl, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) list_del(&rec->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) rc = rec->rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) kfree(rec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) kref_get(&rc->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) if (atomic_dec_and_test(&rc->elems)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) kref_put(&rc->refcnt, reada_control_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) wake_up(&rc->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) kref_put(&rc->refcnt, reada_control_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) reada_extent_put(fs_info, re); /* one ref for each entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) int btree_readahead_hook(struct extent_buffer *eb, int err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) struct btrfs_fs_info *fs_info = eb->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct reada_extent *re;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* find extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) re = radix_tree_lookup(&fs_info->reada_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) eb->start >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (re)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) re->refcnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (!re) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) ret = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) goto start_machine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) __readahead_hook(fs_info, re, eb, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) reada_extent_put(fs_info, re); /* our ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) start_machine:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) reada_start_machine(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) struct btrfs_bio *bbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) struct btrfs_fs_info *fs_info = dev->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) struct reada_zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) struct btrfs_block_group *cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) u64 end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) logical >> PAGE_SHIFT, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (ret == 1 && logical >= zone->start && logical <= zone->end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) kref_get(&zone->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) return zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) cache = btrfs_lookup_block_group(fs_info, logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) if (!cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) start = cache->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) end = start + cache->length - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) btrfs_put_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) zone = kzalloc(sizeof(*zone), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) if (!zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) ret = radix_tree_preload(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) kfree(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) zone->start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) zone->end = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) INIT_LIST_HEAD(&zone->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) spin_lock_init(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) zone->locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) kref_init(&zone->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) zone->elems = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) zone->device = dev; /* our device always sits at index 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) for (i = 0; i < bbio->num_stripes; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /* bounds have already been checked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) zone->devs[i] = bbio->stripes[i].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) zone->ndevs = bbio->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) ret = radix_tree_insert(&dev->reada_zones,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) (unsigned long)(zone->end >> PAGE_SHIFT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) if (ret == -EEXIST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) kfree(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) logical >> PAGE_SHIFT, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) if (ret == 1 && logical >= zone->start && logical <= zone->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) kref_get(&zone->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) return zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) u64 logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct btrfs_key *top)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) struct reada_extent *re = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) struct reada_extent *re_exist = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) struct btrfs_device *prev_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) u64 length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) int real_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) int nzones = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) unsigned long index = logical >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) int dev_replace_is_ongoing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) int have_zone = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) re = radix_tree_lookup(&fs_info->reada_tree, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) if (re)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) re->refcnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (re)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) return re;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) re = kzalloc(sizeof(*re), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) if (!re)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) re->logical = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) re->top = *top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) INIT_LIST_HEAD(&re->extctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) spin_lock_init(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) re->refcnt = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * map block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) length = fs_info->nodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) &length, &bbio, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (ret || !bbio || length < fs_info->nodesize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) "readahead: more than %d copies not supported",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) BTRFS_MAX_MIRRORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) for (nzones = 0; nzones < real_stripes; ++nzones) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) struct reada_zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) dev = bbio->stripes[nzones].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) /* cannot read ahead on missing device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (!dev->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) zone = reada_find_zone(dev, logical, bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) if (!zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) re->zones[re->nzones++] = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) spin_lock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) if (!zone->elems)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) kref_get(&zone->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) ++zone->elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) spin_unlock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) kref_put(&zone->refcnt, reada_zone_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (re->nzones == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) /* not a single zone found, error and out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) /* Insert extent in reada tree + all per-device trees, all or nothing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) down_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) ret = radix_tree_preload(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) ret = radix_tree_insert(&fs_info->reada_tree, index, re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (ret == -EEXIST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) re_exist->refcnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) prev_dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) &fs_info->dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) for (nzones = 0; nzones < re->nzones; ++nzones) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) dev = re->zones[nzones]->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (dev == prev_dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * in case of DUP, just add the first zone. As both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * are on the same device, there's nothing to gain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * from adding both.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * Also, it wouldn't work, as the tree is per device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * and adding would fail with EEXIST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (!dev->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (test_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) if (dev_replace_is_ongoing &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) dev == fs_info->dev_replace.tgtdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * as this device is selected for reading only as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * a last resort, skip it for read ahead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) prev_dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) ret = radix_tree_insert(&dev->reada_extents, index, re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) while (--nzones >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) dev = re->zones[nzones]->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) BUG_ON(dev == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /* ignore whether the entry was inserted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) radix_tree_delete(&dev->reada_extents, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) radix_tree_delete(&fs_info->reada_tree, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) have_zone = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) if (!have_zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) radix_tree_delete(&fs_info->reada_tree, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) if (!have_zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return re;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) for (nzones = 0; nzones < re->nzones; ++nzones) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) struct reada_zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) zone = re->zones[nzones];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) kref_get(&zone->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) spin_lock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) --zone->elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) if (zone->elems == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * no fs_info->reada_lock needed, as this can't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * the last ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) kref_put(&zone->refcnt, reada_zone_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) spin_unlock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) kref_put(&zone->refcnt, reada_zone_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) kfree(re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return re_exist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) static void reada_extent_put(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) struct reada_extent *re)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) unsigned long index = re->logical >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) if (--re->refcnt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) radix_tree_delete(&fs_info->reada_tree, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) for (i = 0; i < re->nzones; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) struct reada_zone *zone = re->zones[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) radix_tree_delete(&zone->device->reada_extents, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) for (i = 0; i < re->nzones; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct reada_zone *zone = re->zones[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) kref_get(&zone->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) spin_lock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) --zone->elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) if (zone->elems == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) /* no fs_info->reada_lock needed, as this can't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * the last ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) kref_put(&zone->refcnt, reada_zone_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) spin_unlock(&zone->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) kref_put(&zone->refcnt, reada_zone_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) kfree(re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) static void reada_zone_release(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) radix_tree_delete(&zone->device->reada_zones,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) zone->end >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) kfree(zone);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static void reada_control_release(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) struct reada_control *rc = container_of(kref, struct reada_control,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) kfree(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static int reada_add_block(struct reada_control *rc, u64 logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) struct btrfs_key *top, u64 generation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) struct btrfs_fs_info *fs_info = rc->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) struct reada_extent *re;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) struct reada_extctl *rec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) /* takes one ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) re = reada_find_extent(fs_info, logical, top);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) if (!re)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) rec = kzalloc(sizeof(*rec), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) if (!rec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) reada_extent_put(fs_info, re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) rec->rc = rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) rec->generation = generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) atomic_inc(&rc->elems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) spin_lock(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) list_add_tail(&rec->list, &re->extctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) spin_unlock(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) /* leave the ref on the extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * called with fs_info->reada_lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) unsigned long index = zone->end >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) for (i = 0; i < zone->ndevs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) struct reada_zone *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) peer = radix_tree_lookup(&zone->devs[i]->reada_zones, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) if (peer && peer->device != zone->device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) peer->locked = lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) * called with fs_info->reada_lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) static int reada_pick_zone(struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) struct reada_zone *top_zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) struct reada_zone *top_locked_zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) u64 top_elems = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) u64 top_locked_elems = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) unsigned long index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (dev->reada_curr_zone) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) reada_peer_zones_set_lock(dev->reada_curr_zone, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) kref_put(&dev->reada_curr_zone->refcnt, reada_zone_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) dev->reada_curr_zone = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) /* pick the zone with the most elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) struct reada_zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) ret = radix_tree_gang_lookup(&dev->reada_zones,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) (void **)&zone, index, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) index = (zone->end >> PAGE_SHIFT) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) if (zone->locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) if (zone->elems > top_locked_elems) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) top_locked_elems = zone->elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) top_locked_zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) if (zone->elems > top_elems) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) top_elems = zone->elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) top_zone = zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) if (top_zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) dev->reada_curr_zone = top_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) else if (top_locked_zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) dev->reada_curr_zone = top_locked_zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) dev->reada_next = dev->reada_curr_zone->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) kref_get(&dev->reada_curr_zone->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) reada_peer_zones_set_lock(dev->reada_curr_zone, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) static int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) int mirror_num, struct extent_buffer **eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) struct extent_buffer *buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) buf = btrfs_find_create_tree_block(fs_info, bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (IS_ERR(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) ret = read_extent_buffer_pages(buf, WAIT_PAGE_LOCK, mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) free_extent_buffer_stale(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) free_extent_buffer_stale(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) } else if (extent_buffer_uptodate(buf)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) *eb = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) free_extent_buffer(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) static int reada_start_machine_dev(struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) struct btrfs_fs_info *fs_info = dev->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) struct reada_extent *re = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) int mirror_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) struct extent_buffer *eb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) if (dev->reada_curr_zone == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) ret = reada_pick_zone(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * FIXME currently we issue the reads one extent at a time. If we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * a contiguous block of extents, we could also coagulate them or use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * plugging to speed things up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) dev->reada_next >> PAGE_SHIFT, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) ret = reada_pick_zone(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) re = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) dev->reada_next >> PAGE_SHIFT, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) dev->reada_next = re->logical + fs_info->nodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) re->refcnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) spin_lock(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) if (re->scheduled || list_empty(&re->extctl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) spin_unlock(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) reada_extent_put(fs_info, re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) re->scheduled = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) spin_unlock(&re->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) * find mirror num
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) for (i = 0; i < re->nzones; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) if (re->zones[i]->device == dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) mirror_num = i + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) logical = re->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) atomic_inc(&dev->reada_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) ret = reada_tree_block_flagged(fs_info, logical, mirror_num, &eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) __readahead_hook(fs_info, re, NULL, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) else if (eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) __readahead_hook(fs_info, re, eb, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (eb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) free_extent_buffer(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) atomic_dec(&dev->reada_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) reada_extent_put(fs_info, re);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) static void reada_start_machine_worker(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) struct reada_machine_work *rmw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) int old_ioprio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) rmw = container_of(work, struct reada_machine_work, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) task_nice_ioprio(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) set_task_ioprio(current, BTRFS_IOPRIO_READA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) __reada_start_machine(rmw->fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) set_task_ioprio(current, old_ioprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) atomic_dec(&rmw->fs_info->reada_works_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) kfree(rmw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) /* Try to start up to 10k READA requests for a group of devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) static int reada_start_for_fsdevs(struct btrfs_fs_devices *fs_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) u64 enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) u64 total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) enqueued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) list_for_each_entry(device, &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) if (atomic_read(&device->reada_in_flight) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) MAX_IN_FLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) enqueued += reada_start_machine_dev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) total += enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) } while (enqueued && total < 10000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) return total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) static void __reada_start_machine(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) u64 enqueued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) enqueued += reada_start_for_fsdevs(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) enqueued += reada_start_for_fsdevs(seed_devs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (enqueued == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) * If everything is already in the cache, this is effectively single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) * threaded. To a) not hold the caller for too long and b) to utilize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * more cores, we broke the loop above after 10000 iterations and now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * enqueue to workers to finish it. This will distribute the load to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) * the cores.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) for (i = 0; i < 2; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) reada_start_machine(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (atomic_read(&fs_info->reada_works_cnt) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) BTRFS_MAX_MIRRORS * 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) static void reada_start_machine(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) struct reada_machine_work *rmw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) rmw = kzalloc(sizeof(*rmw), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) if (!rmw) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) /* FIXME we cannot handle this properly right now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) rmw->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) atomic_inc(&fs_info->reada_works_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) static void dump_devs(struct btrfs_fs_info *fs_info, int all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) int cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) list_for_each_entry(device, &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) btrfs_debug(fs_info, "dev %lld has %d in flight", device->devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) atomic_read(&device->reada_in_flight));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) struct reada_zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) ret = radix_tree_gang_lookup(&device->reada_zones,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) (void **)&zone, index, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) pr_debug(" zone %llu-%llu elems %llu locked %d devs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) zone->start, zone->end, zone->elems,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) zone->locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) for (j = 0; j < zone->ndevs; ++j) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) pr_cont(" %lld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) zone->devs[j]->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) if (device->reada_curr_zone == zone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) pr_cont(" curr off %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) device->reada_next - zone->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) index = (zone->end >> PAGE_SHIFT) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) while (all) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) struct reada_extent *re = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) ret = radix_tree_gang_lookup(&device->reada_extents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) (void **)&re, index, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) pr_debug(" re: logical %llu size %u empty %d scheduled %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) re->logical, fs_info->nodesize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) list_empty(&re->extctl), re->scheduled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) for (i = 0; i < re->nzones; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) pr_cont(" zone %llu-%llu devs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) re->zones[i]->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) re->zones[i]->end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) for (j = 0; j < re->zones[i]->ndevs; ++j) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) pr_cont(" %lld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) re->zones[i]->devs[j]->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) index = (re->logical >> PAGE_SHIFT) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) if (++cnt > 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) while (all) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) struct reada_extent *re = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) index, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) if (!re->scheduled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) index = (re->logical >> PAGE_SHIFT) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) pr_debug("re: logical %llu size %u list empty %d scheduled %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) re->logical, fs_info->nodesize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) list_empty(&re->extctl), re->scheduled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) for (i = 0; i < re->nzones; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) pr_cont(" zone %llu-%llu devs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) re->zones[i]->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) re->zones[i]->end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) for (j = 0; j < re->zones[i]->ndevs; ++j) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) pr_cont(" %lld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) re->zones[i]->devs[j]->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) index = (re->logical >> PAGE_SHIFT) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) * interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) struct reada_control *btrfs_reada_add(struct btrfs_root *root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) struct btrfs_key *key_start, struct btrfs_key *key_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) struct reada_control *rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) u64 generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) struct extent_buffer *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) static struct btrfs_key max_key = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) .objectid = (u64)-1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) .type = (u8)-1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) .offset = (u64)-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) rc = kzalloc(sizeof(*rc), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) if (!rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) rc->fs_info = root->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) rc->key_start = *key_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) rc->key_end = *key_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) atomic_set(&rc->elems, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) init_waitqueue_head(&rc->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) kref_init(&rc->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) kref_get(&rc->refcnt); /* one ref for having elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) node = btrfs_root_node(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) start = node->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) generation = btrfs_header_generation(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) free_extent_buffer(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) ret = reada_add_block(rc, start, &max_key, generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) kfree(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) reada_start_machine(root->fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) int btrfs_reada_wait(void *handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) struct reada_control *rc = handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) struct btrfs_fs_info *fs_info = rc->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) while (atomic_read(&rc->elems)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (!atomic_read(&fs_info->reada_works_cnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) reada_start_machine(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) 5 * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) kref_put(&rc->refcnt, reada_control_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) int btrfs_reada_wait(void *handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) struct reada_control *rc = handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) struct btrfs_fs_info *fs_info = rc->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) while (atomic_read(&rc->elems)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) if (!atomic_read(&fs_info->reada_works_cnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) reada_start_machine(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) (HZ + 9) / 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) kref_put(&rc->refcnt, reada_control_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) void btrfs_reada_detach(void *handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) struct reada_control *rc = handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) kref_put(&rc->refcnt, reada_control_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * Before removing a device (device replace or device remove ioctls), call this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * function to wait for all existing readahead requests on the device and to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * make sure no one queues more readahead requests for the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * Must be called without holding neither the device list mutex nor the device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) * replace semaphore, otherwise it will deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) void btrfs_reada_remove_dev(struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) struct btrfs_fs_info *fs_info = dev->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) /* Serialize with readahead extent creation at reada_find_extent(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) spin_lock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) set_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) spin_unlock(&fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * There might be readahead requests added to the radix trees which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * were not yet added to the readahead work queue. We need to start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * them and wait for their completion, otherwise we can end up with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * use-after-free problems when dropping the last reference on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * readahead extents and their zones, as they need to access the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * device structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) reada_start_machine(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) btrfs_flush_workqueue(fs_info->readahead_workers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) * If when removing a device (device replace or device remove ioctls) an error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) * happens after calling btrfs_reada_remove_dev(), call this to undo what that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) * function did. This is safe to call even if btrfs_reada_remove_dev() was not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) * called before.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) void btrfs_reada_undo_remove_dev(struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) spin_lock(&dev->fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) clear_bit(BTRFS_DEV_STATE_NO_READA, &dev->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) spin_unlock(&dev->fs_info->reada_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }