^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2012 Fusion-io All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2012 Intel Corp. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/list_sort.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/raid/xor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "disk-io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "volumes.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "raid56.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "async-thread.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /* set when additional merges to this rbio are not allowed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define RBIO_RMW_LOCKED_BIT 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * set when this rbio is sitting in the hash, but it is just a cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * of past RMW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define RBIO_CACHE_BIT 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * set when it is safe to trust the stripe_pages for caching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define RBIO_CACHE_READY_BIT 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define RBIO_CACHE_SIZE 1024
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define BTRFS_STRIPE_HASH_TABLE_BITS 11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) /* Used by the raid56 code to lock stripes for read/modify/write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct btrfs_stripe_hash {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct list_head hash_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) /* Used by the raid56 code to lock stripes for read/modify/write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) struct btrfs_stripe_hash_table {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct list_head stripe_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) spinlock_t cache_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) int cache_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct btrfs_stripe_hash table[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) enum btrfs_rbio_ops {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) BTRFS_RBIO_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) BTRFS_RBIO_READ_REBUILD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) BTRFS_RBIO_PARITY_SCRUB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) BTRFS_RBIO_REBUILD_MISSING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct btrfs_raid_bio {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct btrfs_fs_info *fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct btrfs_bio *bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /* while we're doing rmw on a stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * we put it into a hash table so we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * lock the stripe and merge more rbios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * into it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct list_head hash_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * LRU list for the stripe cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) struct list_head stripe_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * for scheduling work in the helper threads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct btrfs_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * bio list and bio_list_lock are used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * to add more bios into the stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * in hopes of avoiding the full rmw
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) struct bio_list bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) spinlock_t bio_list_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) /* also protected by the bio_list_lock, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * plug list is used by the plugging code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * to collect partial bios while plugged. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * stripe locking code also uses it to hand off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * the stripe lock to the next pending IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct list_head plug_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * flags that tell us if it is safe to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * merge with this bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* size of each individual stripe on disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) int stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /* number of data stripes (no p/q) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) int nr_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) int real_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) int stripe_npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * set if we're doing a parity rebuild
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * for a read from higher up, which is handled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * differently from a parity rebuild as part of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * rmw
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) enum btrfs_rbio_ops operation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /* first bad stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) int faila;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) /* second bad stripe (for raid6 use) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) int failb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) int scrubp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * number of pages needed to represent the full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) int nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * size of all the bios in the bio_list. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * helps us decide if the rbio maps to a full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * stripe or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) int bio_list_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) int generic_bio_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) atomic_t stripes_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) atomic_t error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * these are two arrays of pointers. We allocate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * rbio big enough to hold them both and setup their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * locations when the rbio is allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /* pointers to pages that we allocated for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * reading/writing stripes directly from the disk (including P/Q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct page **stripe_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * pointers to the pages in the bio_list. Stored
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * here for faster lookup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) struct page **bio_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * bitmap to record which horizontal stripe has data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) unsigned long *dbitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /* allocated with real_stripes-many pointers for finish_*() calls */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) void **finish_pointers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /* allocated with stripe_npages-many bits for finish_*() calls */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) unsigned long *finish_pbitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) static void rmw_work(struct btrfs_work *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static void read_rebuild_work(struct btrfs_work *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) static void __free_raid_bio(struct btrfs_raid_bio *rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static void index_rbio_pages(struct btrfs_raid_bio *rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) int need_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) static void scrub_parity_work(struct btrfs_work *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) btrfs_init_work(&rbio->work, work_func, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * the stripe hash table is used for locking, and to collect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * bios in hopes of making a full stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct btrfs_stripe_hash_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct btrfs_stripe_hash_table *x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) struct btrfs_stripe_hash *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct btrfs_stripe_hash *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (info->stripe_hash_table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * The table is large, starting with order 4 and can go as high as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * order 7 in case lock debugging is turned on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * Try harder to allocate and fallback to vmalloc to lower the chance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * of a failing mount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (!table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) spin_lock_init(&table->cache_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) INIT_LIST_HEAD(&table->stripe_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) h = table->table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) for (i = 0; i < num_entries; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) cur = h + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) INIT_LIST_HEAD(&cur->hash_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) spin_lock_init(&cur->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) x = cmpxchg(&info->stripe_hash_table, NULL, table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) if (x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) kvfree(x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * caching an rbio means to copy anything from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * bio_pages array into the stripe_pages array. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * use the page uptodate bit in the stripe cache array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * to indicate if it has valid data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * once the caching is done, we set the cache ready
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) char *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) char *d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) ret = alloc_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) for (i = 0; i < rbio->nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) if (!rbio->bio_pages[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) s = kmap(rbio->bio_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) d = kmap(rbio->stripe_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) copy_page(d, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) kunmap(rbio->bio_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) kunmap(rbio->stripe_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) SetPageUptodate(rbio->stripe_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * we hash on the first logical address of the stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) static int rbio_bucket(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) u64 num = rbio->bbio->raid_map[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * we shift down quite a bit. We're using byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * addressing, and most of the lower bits are zeros.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * This tends to upset hash_64, and it consistently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * returns just one or two different values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * shifting off the lower bits fixes things.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * stealing an rbio means taking all the uptodate pages from the stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * array in the source rbio and putting them into the destination rbio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) struct page *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) struct page *d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) for (i = 0; i < dest->nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) s = src->stripe_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) if (!s || !PageUptodate(s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) d = dest->stripe_pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) __free_page(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) dest->stripe_pages[i] = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) src->stripe_pages[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * merging means we take the bio_list from the victim and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * splice it into the destination. The victim should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * be discarded afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * must be called with dest->rbio_list_lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static void merge_rbio(struct btrfs_raid_bio *dest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) struct btrfs_raid_bio *victim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) bio_list_merge(&dest->bio_list, &victim->bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) dest->bio_list_bytes += victim->bio_list_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) dest->generic_bio_cnt += victim->generic_bio_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) bio_list_init(&victim->bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * used to prune items that are in the cache. The caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * must hold the hash table lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) int bucket = rbio_bucket(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) struct btrfs_stripe_hash_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct btrfs_stripe_hash *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) int freeit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * check the bit again under the hash table lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) table = rbio->fs_info->stripe_hash_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) h = table->table + bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) /* hold the lock for the bucket because we may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * removing it from the hash table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) spin_lock(&h->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * hold the lock for the bio list because we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * to make sure the bio list is empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) spin_lock(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) list_del_init(&rbio->stripe_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) table->cache_size -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) freeit = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /* if the bio list isn't empty, this rbio is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * still involved in an IO. We take it out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * of the cache list, and drop the ref that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * was held for the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) * If the bio_list was empty, we also remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * the rbio from the hash_table, and drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * the corresponding ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) if (bio_list_empty(&rbio->bio_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) if (!list_empty(&rbio->hash_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) list_del_init(&rbio->hash_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) refcount_dec(&rbio->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) BUG_ON(!list_empty(&rbio->plug_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) spin_unlock(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) spin_unlock(&h->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) if (freeit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) __free_raid_bio(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * prune a given rbio from the cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) struct btrfs_stripe_hash_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) table = rbio->fs_info->stripe_hash_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) spin_lock_irqsave(&table->cache_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) __remove_rbio_from_cache(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) spin_unlock_irqrestore(&table->cache_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * remove everything in the cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) struct btrfs_stripe_hash_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) table = info->stripe_hash_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) spin_lock_irqsave(&table->cache_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) while (!list_empty(&table->stripe_cache)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) rbio = list_entry(table->stripe_cache.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct btrfs_raid_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) stripe_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) __remove_rbio_from_cache(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) spin_unlock_irqrestore(&table->cache_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * remove all cached entries and free the hash table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) * used by unmount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (!info->stripe_hash_table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) btrfs_clear_rbio_cache(info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) kvfree(info->stripe_hash_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) info->stripe_hash_table = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * insert an rbio into the stripe cache. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * must have already been prepared by calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * cache_rbio_pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * If this rbio was already cached, it gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * moved to the front of the lru.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * If the size of the rbio cache is too big, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * prune an item.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) static void cache_rbio(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) struct btrfs_stripe_hash_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) table = rbio->fs_info->stripe_hash_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) spin_lock_irqsave(&table->cache_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) spin_lock(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /* bump our ref if we were not in the list before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) refcount_inc(&rbio->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if (!list_empty(&rbio->stripe_cache)){
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) list_move(&rbio->stripe_cache, &table->stripe_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) list_add(&rbio->stripe_cache, &table->stripe_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) table->cache_size += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) spin_unlock(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (table->cache_size > RBIO_CACHE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) struct btrfs_raid_bio *found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) found = list_entry(table->stripe_cache.prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct btrfs_raid_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) stripe_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) if (found != rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) __remove_rbio_from_cache(found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) spin_unlock_irqrestore(&table->cache_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * helper function to run the xor_blocks api. It is only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * able to do MAX_XOR_BLOCKS at a time, so we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * loop through.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) static void run_xor(void **pages, int src_cnt, ssize_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) int src_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) int xor_src_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) void *dest = pages[src_cnt];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) while(src_cnt > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) xor_blocks(xor_src_cnt, len, dest, pages + src_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) src_cnt -= xor_src_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) src_off += xor_src_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) * Returns true if the bio list inside this rbio covers an entire stripe (no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * rmw required).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) static int rbio_is_full(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) unsigned long size = rbio->bio_list_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) int ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) spin_lock_irqsave(&rbio->bio_list_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (size != rbio->nr_data * rbio->stripe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) BUG_ON(size > rbio->nr_data * rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * returns 1 if it is safe to merge two rbios together.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * The merging is safe if the two rbios correspond to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * the same stripe and if they are both going in the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * direction (read vs write), and if neither one is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * locked for final IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * The caller is responsible for locking such that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) * rmw_locked is safe to test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) static int rbio_can_merge(struct btrfs_raid_bio *last,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) struct btrfs_raid_bio *cur)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * we can't merge with cached rbios, since the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * idea is that when we merge the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * rbio is going to run our IO for us. We can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * steal from cached rbios though, other functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * handle that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) test_bit(RBIO_CACHE_BIT, &cur->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) if (last->bbio->raid_map[0] !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) cur->bbio->raid_map[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) /* we can't merge with different operations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) if (last->operation != cur->operation)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * We've need read the full stripe from the drive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * check and repair the parity and write the new results.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * We're not allowed to add any new bios to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * bio list here, anyone else that wants to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) * change this stripe needs to do their own rmw.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (last->operation == BTRFS_RBIO_READ_REBUILD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) int fa = last->faila;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) int fb = last->failb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) int cur_fa = cur->faila;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) int cur_fb = cur->failb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) if (last->faila >= last->failb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) fa = last->failb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) fb = last->faila;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (cur->faila >= cur->failb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) cur_fa = cur->failb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) cur_fb = cur->faila;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (fa != cur_fa || fb != cur_fb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) return stripe * rbio->stripe_npages + index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * these are just the pages from the rbio array, not from anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * the FS sent down to us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * helper to index into the pstripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) return rbio_stripe_page(rbio, rbio->nr_data, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) * helper to index into the qstripe, returns null
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) * if there is no qstripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) if (rbio->nr_data + 1 == rbio->real_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * The first stripe in the table for a logical address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) * has the lock. rbios are added in one of three ways:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) * 1) Nobody has the stripe locked yet. The rbio is given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) * the lock and 0 is returned. The caller must start the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) * themselves.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) * 2) Someone has the stripe locked, but we're able to merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) * with the lock owner. The rbio is freed and the IO will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) * start automatically along with the existing rbio. 1 is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) * 3) Someone has the stripe locked, but we're not able to merge.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) * The rbio is added to the lock owner's plug list, or merged into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * an rbio already on the plug list. When the lock owner unlocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * the next rbio on the list is run and the IO is started automatically.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * 1 is returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * If we return 0, the caller still owns the rbio and must continue with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * IO submission. If we return 1, the caller must assume the rbio has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * already been freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) struct btrfs_stripe_hash *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) struct btrfs_raid_bio *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) struct btrfs_raid_bio *pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) struct btrfs_raid_bio *freeit = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) struct btrfs_raid_bio *cache_drop = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) spin_lock_irqsave(&h->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) list_for_each_entry(cur, &h->hash_list, hash_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) spin_lock(&cur->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) /* Can we steal this cached rbio's pages? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (bio_list_empty(&cur->bio_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) list_empty(&cur->plug_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) test_bit(RBIO_CACHE_BIT, &cur->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) list_del_init(&cur->hash_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) refcount_dec(&cur->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) steal_rbio(cur, rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) cache_drop = cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) spin_unlock(&cur->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) goto lockit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) /* Can we merge into the lock owner? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (rbio_can_merge(cur, rbio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) merge_rbio(cur, rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) spin_unlock(&cur->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) freeit = rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) * We couldn't merge with the running rbio, see if we can merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) * with the pending ones. We don't have to check for rmw_locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) * because there is no way they are inside finish_rmw right now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) list_for_each_entry(pending, &cur->plug_list, plug_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (rbio_can_merge(pending, rbio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) merge_rbio(pending, rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) spin_unlock(&cur->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) freeit = rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) * No merging, put us on the tail of the plug list, our rbio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) * will be started with the currently running rbio unlocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) list_add_tail(&rbio->plug_list, &cur->plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) spin_unlock(&cur->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) lockit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) refcount_inc(&rbio->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) list_add(&rbio->hash_list, &h->hash_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) spin_unlock_irqrestore(&h->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) if (cache_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) remove_rbio_from_cache(cache_drop);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) if (freeit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) __free_raid_bio(freeit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) * called as rmw or parity rebuild is completed. If the plug list has more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * rbios waiting for this stripe, the next one on the list will be started
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) int bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) struct btrfs_stripe_hash *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) int keep_cache = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) bucket = rbio_bucket(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) h = rbio->fs_info->stripe_hash_table->table + bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) if (list_empty(&rbio->plug_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) cache_rbio(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) spin_lock_irqsave(&h->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) spin_lock(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) if (!list_empty(&rbio->hash_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * if we're still cached and there is no other IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * to perform, just leave this rbio here for others
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) * to steal from later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) if (list_empty(&rbio->plug_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) keep_cache = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) BUG_ON(!bio_list_empty(&rbio->bio_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) list_del_init(&rbio->hash_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) refcount_dec(&rbio->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) * we use the plug list to hold all the rbios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) * waiting for the chance to lock this stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) * hand the lock over to one of them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (!list_empty(&rbio->plug_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) struct btrfs_raid_bio *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) struct list_head *head = rbio->plug_list.next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) next = list_entry(head, struct btrfs_raid_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) list_del_init(&rbio->plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) list_add(&next->hash_list, &h->hash_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) refcount_inc(&next->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) spin_unlock(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) spin_unlock_irqrestore(&h->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (next->operation == BTRFS_RBIO_READ_REBUILD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) start_async_work(next, read_rebuild_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) steal_rbio(rbio, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) start_async_work(next, read_rebuild_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) } else if (next->operation == BTRFS_RBIO_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) steal_rbio(rbio, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) start_async_work(next, rmw_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) steal_rbio(rbio, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) start_async_work(next, scrub_parity_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) goto done_nolock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) spin_unlock(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) spin_unlock_irqrestore(&h->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) done_nolock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (!keep_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) remove_rbio_from_cache(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) static void __free_raid_bio(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (!refcount_dec_and_test(&rbio->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) WARN_ON(!list_empty(&rbio->stripe_cache));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) WARN_ON(!list_empty(&rbio->hash_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) WARN_ON(!bio_list_empty(&rbio->bio_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) for (i = 0; i < rbio->nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) if (rbio->stripe_pages[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) __free_page(rbio->stripe_pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) rbio->stripe_pages[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) btrfs_put_bbio(rbio->bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) kfree(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) struct bio *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) while (cur) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) next = cur->bi_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) cur->bi_next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) cur->bi_status = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) bio_endio(cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) cur = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) * this frees the rbio and runs through all the bios in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) * bio_list and calls end_io on them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) struct bio *cur = bio_list_get(&rbio->bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) struct bio *extra;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) if (rbio->generic_bio_cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) * At this moment, rbio->bio_list is empty, however since rbio does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * hash list, rbio may be merged with others so that rbio->bio_list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * becomes non-empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * Once unlock_stripe() is done, rbio->bio_list will not be updated any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) * more and we can call bio_endio() on all queued bios.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) unlock_stripe(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) extra = bio_list_get(&rbio->bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) __free_raid_bio(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) rbio_endio_bio_list(cur, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) if (extra)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) rbio_endio_bio_list(extra, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) * end io function used by finish_rmw. When we finally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) * get here, we've written a full stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) static void raid_write_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) struct btrfs_raid_bio *rbio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) blk_status_t err = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) int max_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) fail_bio_stripe(rbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) if (!atomic_dec_and_test(&rbio->stripes_pending))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) err = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) /* OK, we have read all the stripes we need to. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) 0 : rbio->bbio->max_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (atomic_read(&rbio->error) > max_errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) err = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) rbio_orig_end_io(rbio, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) * the read/modify/write code wants to use the original bio for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * any pages it included, and then use the rbio for everything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * else. This function decides if a given index (stripe number)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) * and page number in that stripe fall inside the original bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) * or the rbio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) * if you set bio_list_only, you'll get a NULL back for any ranges
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) * that are outside the bio_list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) * This doesn't take any refs on anything, you get a bare page pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) * and the caller must bump refs as required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) * You must call index_rbio_pages once before you can trust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) * the answers from this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) int index, int pagenr, int bio_list_only)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) int chunk_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) struct page *p = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) spin_lock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) p = rbio->bio_pages[chunk_page];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) spin_unlock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) if (p || bio_list_only)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) return rbio->stripe_pages[chunk_page];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * number of pages we need for the entire stripe across all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * drives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) * allocation and initial setup for the btrfs_raid_bio. Not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) * this does not allocate any pages for rbio->pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) struct btrfs_bio *bbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) u64 stripe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) int nr_data = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) int num_pages = rbio_nr_pages(stripe_len, real_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) rbio = kzalloc(sizeof(*rbio) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) sizeof(*rbio->stripe_pages) * num_pages +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) sizeof(*rbio->bio_pages) * num_pages +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) sizeof(*rbio->finish_pointers) * real_stripes +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) sizeof(*rbio->finish_pbitmap) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) BITS_TO_LONGS(stripe_npages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (!rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) bio_list_init(&rbio->bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) INIT_LIST_HEAD(&rbio->plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) spin_lock_init(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) INIT_LIST_HEAD(&rbio->stripe_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) INIT_LIST_HEAD(&rbio->hash_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) rbio->bbio = bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) rbio->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) rbio->stripe_len = stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) rbio->nr_pages = num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) rbio->real_stripes = real_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) rbio->stripe_npages = stripe_npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) rbio->faila = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) rbio->failb = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) refcount_set(&rbio->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) atomic_set(&rbio->error, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) atomic_set(&rbio->stripes_pending, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * the stripe_pages, bio_pages, etc arrays point to the extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * memory we allocated past the end of the rbio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) p = rbio + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) #define CONSUME_ALLOC(ptr, count) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) ptr = p; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) p = (unsigned char *)p + sizeof(*(ptr)) * (count); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) CONSUME_ALLOC(rbio->stripe_pages, num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) CONSUME_ALLOC(rbio->bio_pages, num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) #undef CONSUME_ALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) nr_data = real_stripes - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) nr_data = real_stripes - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) rbio->nr_data = nr_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) return rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) /* allocate pages for all the stripes in the bio, including parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) for (i = 0; i < rbio->nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) if (rbio->stripe_pages[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) rbio->stripe_pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) /* only allocate pages for p/q stripes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) for (; i < rbio->nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) if (rbio->stripe_pages[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) rbio->stripe_pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * add a single page from a specific stripe into our list of bios for IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * this will try to merge into existing bios if possible, and returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) * zero if all went well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) struct bio_list *bio_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) int stripe_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) unsigned long page_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) unsigned long bio_max_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) struct bio *last = bio_list->tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) struct btrfs_bio_stripe *stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) u64 disk_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) stripe = &rbio->bbio->stripes[stripe_nr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) disk_start = stripe->physical + (page_index << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) /* if the device is missing, just fail this stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) if (!stripe->dev->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) return fail_rbio_index(rbio, stripe_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) /* see if we can add this page onto our existing bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (last) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) u64 last_end = (u64)last->bi_iter.bi_sector << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) last_end += last->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) * we can't merge these if they are from different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * devices or if they are not contiguous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) if (last_end == disk_start && !last->bi_status &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) last->bi_disk == stripe->dev->bdev->bd_disk &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) last->bi_partno == stripe->dev->bdev->bd_partno) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) ret = bio_add_page(last, page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) if (ret == PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) /* put a new bio on the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) btrfs_io_bio(bio)->device = stripe->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) bio->bi_iter.bi_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) bio_set_dev(bio, stripe->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) bio->bi_iter.bi_sector = disk_start >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) bio_add_page(bio, page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) bio_list_add(bio_list, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * while we're doing the read/modify/write cycle, we could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * have errors in reading pages off the disk. This checks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * for errors and if we're not able to read the page it'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * trigger parity reconstruction. The rmw will be finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * after we've reconstructed the failed stripes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) if (rbio->faila >= 0 || rbio->failb >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) BUG_ON(rbio->faila == rbio->real_stripes - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) __raid56_parity_recover(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) finish_rmw(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) * helper function to walk our bio list and populate the bio_pages array with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) * the result. This seems expensive, but it is faster than constantly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) * searching through the bio list as we setup the IO in finish_rmw or stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) * reconstruction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * This must be called before you trust the answers from page_in_rbio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) static void index_rbio_pages(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) unsigned long stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) unsigned long page_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) spin_lock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) bio_list_for_each(bio, &rbio->bio_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) struct bio_vec bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) struct bvec_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) start = (u64)bio->bi_iter.bi_sector << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) stripe_offset = start - rbio->bbio->raid_map[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) page_index = stripe_offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) if (bio_flagged(bio, BIO_CLONED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) bio->bi_iter = btrfs_io_bio(bio)->iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) bio_for_each_segment(bvec, bio, iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) rbio->bio_pages[page_index + i] = bvec.bv_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) spin_unlock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) * this is called from one of two situations. We either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * have a full stripe from the higher layers, or we've read all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) * the missing bits off disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * This will calculate the parity and then send down any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * changed blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) struct btrfs_bio *bbio = rbio->bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) void **pointers = rbio->finish_pointers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) int nr_data = rbio->nr_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) int stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) int pagenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) bool has_qstripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) struct bio_list bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) bio_list_init(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) if (rbio->real_stripes - rbio->nr_data == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) has_qstripe = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) else if (rbio->real_stripes - rbio->nr_data == 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) has_qstripe = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) /* at this point we either have a full stripe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) * or we've read the full stripe from the drive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) * recalculate the parity and write the new results.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * We're not allowed to add any new bios to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) * bio list here, anyone else that wants to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) * change this stripe needs to do their own rmw.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) spin_lock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) spin_unlock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) atomic_set(&rbio->error, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) * now that we've set rmw_locked, run through the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) * bio list one last time and map the page pointers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * We don't cache full rbios because we're assuming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * the higher layers are unlikely to use this area of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) * the disk again soon. If they do use it again,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) * hopefully they will send another full bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) index_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) if (!rbio_is_full(rbio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) cache_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /* first collect one page from each data stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) for (stripe = 0; stripe < nr_data; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) p = page_in_rbio(rbio, stripe, pagenr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) pointers[stripe] = kmap(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) /* then add the parity stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) p = rbio_pstripe_page(rbio, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) SetPageUptodate(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) pointers[stripe++] = kmap(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) if (has_qstripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) * raid6, add the qstripe and call the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) * library function to fill in our p/q
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) p = rbio_qstripe_page(rbio, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) SetPageUptodate(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) pointers[stripe++] = kmap(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) pointers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) /* raid5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) copy_page(pointers[nr_data], pointers[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) for (stripe = 0; stripe < rbio->real_stripes; stripe++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) * time to start writing. Make bios for everything from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) * higher layers (the bio_list in our rbio) and our p/q. Ignore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) * everything else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (stripe < rbio->nr_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) page = page_in_rbio(rbio, stripe, pagenr, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) page = rbio_stripe_page(rbio, stripe, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) ret = rbio_add_io_page(rbio, &bio_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) page, stripe, pagenr, rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) if (likely(!bbio->num_tgtdevs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) goto write_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) if (!bbio->tgtdev_map[stripe])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) if (stripe < rbio->nr_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) page = page_in_rbio(rbio, stripe, pagenr, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) page = rbio_stripe_page(rbio, stripe, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) ret = rbio_add_io_page(rbio, &bio_list, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) rbio->bbio->tgtdev_map[stripe],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) pagenr, rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) write_data:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) while ((bio = bio_list_pop(&bio_list))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) bio->bi_private = rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) bio->bi_end_io = raid_write_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) bio->bi_opf = REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) while ((bio = bio_list_pop(&bio_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * helper to find the stripe number for a given bio. Used to figure out which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) * stripe has failed. This expects the bio to correspond to a physical disk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * so it looks up based on physical sector numbers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) static int find_bio_stripe(struct btrfs_raid_bio *rbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) u64 physical = bio->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) struct btrfs_bio_stripe *stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) physical <<= 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) for (i = 0; i < rbio->bbio->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) stripe = &rbio->bbio->stripes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if (in_range(physical, stripe->physical, rbio->stripe_len) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) stripe->dev->bdev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) bio->bi_disk == stripe->dev->bdev->bd_disk &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) bio->bi_partno == stripe->dev->bdev->bd_partno) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) * helper to find the stripe number for a given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) * bio (before mapping). Used to figure out which stripe has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) * failed. This looks up based on logical block numbers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) u64 logical = (u64)bio->bi_iter.bi_sector << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) for (i = 0; i < rbio->nr_data; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) u64 stripe_start = rbio->bbio->raid_map[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (in_range(logical, stripe_start, rbio->stripe_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) * returns -EIO if we had too many failures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) spin_lock_irqsave(&rbio->bio_list_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) /* we already know this stripe is bad, move on */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) if (rbio->faila == failed || rbio->failb == failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) if (rbio->faila == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) /* first failure on this rbio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) rbio->faila = failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) atomic_inc(&rbio->error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) } else if (rbio->failb == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) /* second failure on this rbio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) rbio->failb = failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) atomic_inc(&rbio->error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) * helper to fail a stripe based on a physical disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) * bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) int failed = find_bio_stripe(rbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) if (failed < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) return fail_rbio_index(rbio, failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * this sets each page in the bio uptodate. It should only be used on private
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) * rbio pages, nothing that comes in from the higher layers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) static void set_bio_pages_uptodate(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) struct bio_vec *bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) struct bvec_iter_all iter_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) ASSERT(!bio_flagged(bio, BIO_CLONED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) bio_for_each_segment_all(bvec, bio, iter_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) SetPageUptodate(bvec->bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) * end io for the read phase of the rmw cycle. All the bios here are physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * stripe bios we've read from the disk so we can recalculate the parity of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) * This will usually kick off finish_rmw once all the bios are read in, but it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * may trigger parity reconstruction if we had any errors along the way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) static void raid_rmw_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) struct btrfs_raid_bio *rbio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) fail_bio_stripe(rbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) set_bio_pages_uptodate(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) if (!atomic_dec_and_test(&rbio->stripes_pending))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) * this will normally call finish_rmw to start our write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) * but if there are any failed stripes we'll reconstruct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * from parity first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) validate_rbio_for_rmw(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) * the stripe must be locked by the caller. It will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) * unlock after all the writes are done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) int bios_to_read = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) struct bio_list bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) int pagenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) int stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) bio_list_init(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) ret = alloc_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) index_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) atomic_set(&rbio->error, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) * build a list of bios to read all the missing parts of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) * stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) for (stripe = 0; stripe < rbio->nr_data; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) * we want to find all the pages missing from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) * the rbio and read them from the disk. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) * page_in_rbio finds a page in the bio list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) * we don't need to read it off the stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) page = page_in_rbio(rbio, stripe, pagenr, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) page = rbio_stripe_page(rbio, stripe, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) * the bio cache may have handed us an uptodate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) * page. If so, be happy and use it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) if (PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) ret = rbio_add_io_page(rbio, &bio_list, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) stripe, pagenr, rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) bios_to_read = bio_list_size(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) if (!bios_to_read) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) * this can happen if others have merged with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) * us, it means there is nothing left to read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) * But if there are missing devices it may not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) * safe to do the full stripe write yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) * the bbio may be freed once we submit the last bio. Make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) * not to touch it after that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) atomic_set(&rbio->stripes_pending, bios_to_read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) while ((bio = bio_list_pop(&bio_list))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) bio->bi_private = rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) bio->bi_end_io = raid_rmw_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) bio->bi_opf = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) /* the actual write will happen once the reads are done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) while ((bio = bio_list_pop(&bio_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) validate_rbio_for_rmw(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) * if the upper layers pass in a full stripe, we thank them by only allocating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) * enough pages to hold the parity, and sending it all down quickly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) static int full_stripe_write(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) ret = alloc_rbio_parity_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) __free_raid_bio(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) ret = lock_stripe_add(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) finish_rmw(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) * partial stripe writes get handed over to async helpers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) * We're really hoping to merge a few more writes into this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) * rbio before calculating new parity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) static int partial_stripe_write(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) ret = lock_stripe_add(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) start_async_work(rbio, rmw_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) * sometimes while we were reading from the drive to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) * recalculate parity, enough new bios come into create
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) * a full stripe. So we do a check here to see if we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * go directly to finish_rmw
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) /* head off into rmw land if we don't have a full stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) if (!rbio_is_full(rbio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) return partial_stripe_write(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) return full_stripe_write(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) * We use plugging call backs to collect full stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) * Any time we get a partial stripe write while plugged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) * we collect it into a list. When the unplug comes down,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) * we sort the list by logical block number and merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) * everything we can into the same rbios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) struct btrfs_plug_cb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) struct blk_plug_cb cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) struct btrfs_fs_info *info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) struct list_head rbio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) struct btrfs_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) * rbios on the plug list are sorted for easier merging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) if (a_sector < b_sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) if (a_sector > b_sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) static void run_plug(struct btrfs_plug_cb *plug)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) struct btrfs_raid_bio *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) struct btrfs_raid_bio *last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) * sort our plug list then try to merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) * everything we can in hopes of creating full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) * stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) list_sort(NULL, &plug->rbio_list, plug_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) while (!list_empty(&plug->rbio_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) cur = list_entry(plug->rbio_list.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) struct btrfs_raid_bio, plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) list_del_init(&cur->plug_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) if (rbio_is_full(cur)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) /* we have a full stripe, send it down */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) ret = full_stripe_write(cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) if (last) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) if (rbio_can_merge(last, cur)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) merge_rbio(last, cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) __free_raid_bio(cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) __raid56_parity_write(last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) last = cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) if (last) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) __raid56_parity_write(last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) kfree(plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) * if the unplug comes from schedule, we have to push the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) * work off to a helper thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) static void unplug_work(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) struct btrfs_plug_cb *plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) plug = container_of(work, struct btrfs_plug_cb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) run_plug(plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) struct btrfs_plug_cb *plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) plug = container_of(cb, struct btrfs_plug_cb, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) if (from_schedule) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) btrfs_queue_work(plug->info->rmw_workers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) &plug->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) run_plug(plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) * our main entry point for writes from the rest of the FS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) struct btrfs_bio *bbio, u64 stripe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) struct btrfs_plug_cb *plug = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) struct blk_plug_cb *cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) rbio = alloc_rbio(fs_info, bbio, stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) if (IS_ERR(rbio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) return PTR_ERR(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) bio_list_add(&rbio->bio_list, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) rbio->bio_list_bytes = bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) rbio->operation = BTRFS_RBIO_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) btrfs_bio_counter_inc_noblocked(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) rbio->generic_bio_cnt = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) * don't plug on full rbios, just get them out the door
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) * as quickly as we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) if (rbio_is_full(rbio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) ret = full_stripe_write(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) if (cb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) plug = container_of(cb, struct btrfs_plug_cb, cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) if (!plug->info) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) plug->info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) INIT_LIST_HEAD(&plug->rbio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) list_add_tail(&rbio->plug_list, &plug->rbio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) ret = __raid56_parity_write(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * all parity reconstruction happens here. We've read in everything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * we can find from the drives and this does the heavy lifting of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * sorting the good from the bad.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) int pagenr, stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) void **pointers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) int faila = -1, failb = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) blk_status_t err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) if (!pointers) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) err = BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) goto cleanup_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) faila = rbio->faila;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) failb = rbio->failb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) spin_lock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) spin_unlock_irq(&rbio->bio_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) index_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) * Now we just use bitmap to mark the horizontal stripes in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) * which we have data when doing parity scrub.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) !test_bit(pagenr, rbio->dbitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) /* setup our array of pointers with pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) * from each stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) * if we're rebuilding a read, we have to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) * pages from the bio list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) (stripe == faila || stripe == failb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) page = page_in_rbio(rbio, stripe, pagenr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) page = rbio_stripe_page(rbio, stripe, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) pointers[stripe] = kmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) /* all raid6 handling here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * single failure, rebuild from parity raid5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * style
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) if (failb < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) if (faila == rbio->nr_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) * Just the P stripe has failed, without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) * a bad data or Q stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) * TODO, we should redo the xor here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) err = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) * a single failure in raid6 is rebuilt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) * in the pstripe code below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) goto pstripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) /* make sure our ps and qs are in order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) if (faila > failb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) swap(faila, failb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) /* if the q stripe is failed, do a pstripe reconstruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) * from the xors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * If both the q stripe and the P stripe are failed, we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) * here due to a crc mismatch and we can't give them the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) * data they want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) if (rbio->bbio->raid_map[faila] ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) RAID5_P_STRIPE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) err = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) * otherwise we have one bad data stripe and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) * a good P stripe. raid5!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) goto pstripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) raid6_datap_recov(rbio->real_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) PAGE_SIZE, faila, pointers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) raid6_2data_recov(rbio->real_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) PAGE_SIZE, faila, failb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) pointers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) /* rebuild from P stripe here (raid5 or raid6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) BUG_ON(failb != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) pstripe:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) /* Copy parity block into failed block to start with */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) copy_page(pointers[faila], pointers[rbio->nr_data]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) /* rearrange the pointer array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) p = pointers[faila];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) pointers[stripe] = pointers[stripe + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) pointers[rbio->nr_data - 1] = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) /* xor in the rest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) /* if we're doing this rebuild as part of an rmw, go through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) * and set all of our private rbio pages in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) * failed stripes as uptodate. This way finish_rmw will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) * know they can be trusted. If this was a read reconstruction,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) * other endio functions will fiddle the uptodate bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) if (rbio->operation == BTRFS_RBIO_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) for (i = 0; i < rbio->stripe_npages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) if (faila != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) page = rbio_stripe_page(rbio, faila, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) if (failb != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) page = rbio_stripe_page(rbio, failb, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) * if we're rebuilding a read, we have to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) * pages from the bio list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) (stripe == faila || stripe == failb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) page = page_in_rbio(rbio, stripe, pagenr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) page = rbio_stripe_page(rbio, stripe, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) kunmap(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) err = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) kfree(pointers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) cleanup_io:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) * valid rbio which is consistent with ondisk content, thus such a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) * valid rbio can be cached to avoid further disk reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) * - In case of two failures, where rbio->failb != -1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) * Do not cache this rbio since the above read reconstruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) * (raid6_datap_recov() or raid6_2data_recov()) may have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) * changed some content of stripes which are not identical to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) * on-disk content any more, otherwise, a later write/recover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) * may steal stripe_pages from this rbio and end up with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) * corruptions or rebuild failures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * - In case of single failure, where rbio->failb == -1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) * Cache this rbio iff the above read reconstruction is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) * executed without problems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) if (err == BLK_STS_OK && rbio->failb < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) cache_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) rbio_orig_end_io(rbio, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) } else if (err == BLK_STS_OK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) rbio->faila = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) rbio->failb = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) if (rbio->operation == BTRFS_RBIO_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) finish_rmw(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) finish_parity_scrub(rbio, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) rbio_orig_end_io(rbio, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) * This is called only for stripes we've read from disk to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) * reconstruct the parity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) static void raid_recover_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) struct btrfs_raid_bio *rbio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) * we only read stripe pages off the disk, set them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) * up to date if there were no errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) fail_bio_stripe(rbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) set_bio_pages_uptodate(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) if (!atomic_dec_and_test(&rbio->stripes_pending))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) __raid_recover_end_io(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) * reads everything we need off the disk to reconstruct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) * the parity. endio handlers trigger final reconstruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) * when the IO is done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) * This is used both for reads from the higher layers and for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) * parity construction required to finish a rmw cycle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) int bios_to_read = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) struct bio_list bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) int pagenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) int stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) bio_list_init(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) ret = alloc_rbio_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) atomic_set(&rbio->error, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) * read everything that hasn't failed. Thanks to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) * stripe cache, it is possible that some or all of these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) * pages are going to be uptodate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) if (rbio->faila == stripe || rbio->failb == stripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) atomic_inc(&rbio->error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) * the rmw code may have already read this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) * page in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) p = rbio_stripe_page(rbio, stripe, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) if (PageUptodate(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) ret = rbio_add_io_page(rbio, &bio_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) rbio_stripe_page(rbio, stripe, pagenr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) stripe, pagenr, rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) bios_to_read = bio_list_size(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) if (!bios_to_read) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) * we might have no bios to read just because the pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) * were up to date, or we might have no bios to read because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) * the devices were gone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) __raid_recover_end_io(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) * the bbio may be freed once we submit the last bio. Make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) * not to touch it after that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) atomic_set(&rbio->stripes_pending, bios_to_read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) while ((bio = bio_list_pop(&bio_list))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) bio->bi_private = rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) bio->bi_end_io = raid_recover_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) bio->bi_opf = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) while ((bio = bio_list_pop(&bio_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) * the main entry point for reads from the higher layers. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) * is really only called when the normal read path had a failure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) * so we assume the bio they send down corresponds to a failed part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) * of the drive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) struct btrfs_bio *bbio, u64 stripe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) int mirror_num, int generic_io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) if (generic_io) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) ASSERT(bbio->mirror_num == mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) btrfs_io_bio(bio)->mirror_num = mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) rbio = alloc_rbio(fs_info, bbio, stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) if (IS_ERR(rbio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) if (generic_io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) return PTR_ERR(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) rbio->operation = BTRFS_RBIO_READ_REBUILD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) bio_list_add(&rbio->bio_list, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) rbio->bio_list_bytes = bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) rbio->faila = find_logical_bio_stripe(rbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) if (rbio->faila == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) __func__, (u64)bio->bi_iter.bi_sector << 9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) (u64)bio->bi_iter.bi_size, bbio->map_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) if (generic_io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) kfree(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) if (generic_io) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) btrfs_bio_counter_inc_noblocked(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) rbio->generic_bio_cnt = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) btrfs_get_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) * Loop retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) * for 'mirror == 2', reconstruct from all other stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) * for 'mirror_num > 2', select a stripe to fail on every retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) if (mirror_num > 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) * 'mirror == 3' is to fail the p stripe and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) * reconstruct from the q stripe. 'mirror > 3' is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) * fail a data stripe and reconstruct from p+q stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) rbio->failb = rbio->real_stripes - (mirror_num - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) ASSERT(rbio->failb > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) if (rbio->failb <= rbio->faila)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) rbio->failb--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) ret = lock_stripe_add(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) * __raid56_parity_recover will end the bio with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) * any errors it hits. We don't want to return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) * its error value up the stack because our caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) * will end up calling bio_endio with any nonzero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) * return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) __raid56_parity_recover(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) * our rbio has been added to the list of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) * rbios that will be handled after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) * currently lock owner is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) static void rmw_work(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) rbio = container_of(work, struct btrfs_raid_bio, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) raid56_rmw_stripe(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) static void read_rebuild_work(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) rbio = container_of(work, struct btrfs_raid_bio, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) __raid56_parity_recover(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * The following code is used to scrub/replace the parity stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * Caller must have already increased bio_counter for getting @bbio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) * Note: We need make sure all the pages that add into the scrub/replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) * raid bio are correct and not be changed during the scrub/replace. That
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) * is those pages just hold metadata or file data with checksum.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) struct btrfs_raid_bio *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) struct btrfs_bio *bbio, u64 stripe_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) struct btrfs_device *scrub_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) unsigned long *dbitmap, int stripe_nsectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) rbio = alloc_rbio(fs_info, bbio, stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) if (IS_ERR(rbio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) bio_list_add(&rbio->bio_list, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) * This is a special bio which is used to hold the completion handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) * and make the scrub rbio is similar to the other types
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) ASSERT(!bio->bi_iter.bi_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) * to the end position, so this search can start from the first parity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) * stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) if (bbio->stripes[i].dev == scrub_dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) rbio->scrubp = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) ASSERT(i < rbio->real_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) /* Now we just support the sectorsize equals to page size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) ASSERT(fs_info->sectorsize == PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) ASSERT(rbio->stripe_npages == stripe_nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) * We have already increased bio_counter when getting bbio, record it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) * so we can free it at rbio_orig_end_io().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) rbio->generic_bio_cnt = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) return rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) /* Used for both parity scrub and missing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) u64 logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) int stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) ASSERT(logical >= rbio->bbio->raid_map[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) rbio->stripe_len * rbio->nr_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) index = stripe_offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) rbio->bio_pages[index] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * We just scrub the parity that we have correct data on the same horizontal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) * so we needn't allocate all pages for all the stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) for (i = 0; i < rbio->real_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) index = i * rbio->stripe_npages + bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) if (rbio->stripe_pages[index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) rbio->stripe_pages[index] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) int need_check)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) struct btrfs_bio *bbio = rbio->bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) void **pointers = rbio->finish_pointers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) unsigned long *pbitmap = rbio->finish_pbitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) int nr_data = rbio->nr_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) int stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) int pagenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) bool has_qstripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) struct page *p_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) struct page *q_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) struct bio_list bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) int is_replace = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) bio_list_init(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) if (rbio->real_stripes - rbio->nr_data == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) has_qstripe = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) else if (rbio->real_stripes - rbio->nr_data == 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) has_qstripe = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) is_replace = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) * Because the higher layers(scrubber) are unlikely to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) * use this area of the disk again soon, so don't cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) * it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) if (!need_check)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) goto writeback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) if (!p_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) SetPageUptodate(p_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) if (has_qstripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) /* RAID6, allocate and map temp space for the Q stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) if (!q_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) __free_page(p_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) SetPageUptodate(q_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) pointers[rbio->real_stripes - 1] = kmap(q_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) atomic_set(&rbio->error, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) /* Map the parity stripe just once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) pointers[nr_data] = kmap(p_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) void *parity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) /* first collect one page from each data stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) for (stripe = 0; stripe < nr_data; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) p = page_in_rbio(rbio, stripe, pagenr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) pointers[stripe] = kmap(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) if (has_qstripe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) /* RAID6, call the library function to fill in our P/Q */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) pointers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) /* raid5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) copy_page(pointers[nr_data], pointers[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) /* Check scrubbing parity and repair it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) parity = kmap(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) copy_page(parity, pointers[rbio->scrubp]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) /* Parity is right, needn't writeback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) bitmap_clear(rbio->dbitmap, pagenr, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) kunmap(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) for (stripe = 0; stripe < nr_data; stripe++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) kunmap(p_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) __free_page(p_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) if (q_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) kunmap(q_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) __free_page(q_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) writeback:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) * time to start writing. Make bios for everything from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) * higher layers (the bio_list in our rbio) and our p/q. Ignore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) * everything else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) ret = rbio_add_io_page(rbio, &bio_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) page, rbio->scrubp, pagenr, rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) if (!is_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) goto submit_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) ret = rbio_add_io_page(rbio, &bio_list, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) bbio->tgtdev_map[rbio->scrubp],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) pagenr, rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) submit_write:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) nr_data = bio_list_size(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) if (!nr_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) /* Every parity is right */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) rbio_orig_end_io(rbio, BLK_STS_OK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) atomic_set(&rbio->stripes_pending, nr_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) while ((bio = bio_list_pop(&bio_list))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) bio->bi_private = rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) bio->bi_end_io = raid_write_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) bio->bi_opf = REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) while ((bio = bio_list_pop(&bio_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) if (stripe >= 0 && stripe < rbio->nr_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) * While we're doing the parity check and repair, we could have errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) * in reading pages off the disk. This checks for errors and if we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) * not able to read the page it'll trigger parity reconstruction. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) * parity scrub will be finished after we've reconstructed the failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) * stripes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) if (rbio->faila >= 0 || rbio->failb >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) int dfail = 0, failp = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) if (is_data_stripe(rbio, rbio->faila))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) dfail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) else if (is_parity_stripe(rbio->faila))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) failp = rbio->faila;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) if (is_data_stripe(rbio, rbio->failb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) dfail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) else if (is_parity_stripe(rbio->failb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) failp = rbio->failb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) * Because we can not use a scrubbing parity to repair
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) * the data, so the capability of the repair is declined.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) * (In the case of RAID5, we can not repair anything)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) if (dfail > rbio->bbio->max_errors - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) * If all data is good, only parity is correctly, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) * repair the parity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) if (dfail == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) finish_parity_scrub(rbio, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) * Here means we got one corrupted data stripe and one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) * corrupted parity on RAID6, if the corrupted parity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) * is scrubbing parity, luckily, use the other one to repair
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) * the data, or we can not repair the data stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) if (failp != rbio->scrubp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) __raid_recover_end_io(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) finish_parity_scrub(rbio, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) * end io for the read phase of the rmw cycle. All the bios here are physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) * stripe bios we've read from the disk so we can recalculate the parity of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) * stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) * This will usually kick off finish_rmw once all the bios are read in, but it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) * may trigger parity reconstruction if we had any errors along the way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) static void raid56_parity_scrub_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) struct btrfs_raid_bio *rbio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) fail_bio_stripe(rbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) set_bio_pages_uptodate(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) if (!atomic_dec_and_test(&rbio->stripes_pending))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) * this will normally call finish_rmw to start our write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) * but if there are any failed stripes we'll reconstruct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) * from parity first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) validate_rbio_for_parity_scrub(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) int bios_to_read = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) struct bio_list bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) int pagenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) int stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) bio_list_init(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) ret = alloc_rbio_essential_pages(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) atomic_set(&rbio->error, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) * build a list of bios to read all the missing parts of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) * stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) * we want to find all the pages missing from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) * the rbio and read them from the disk. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) * page_in_rbio finds a page in the bio list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) * we don't need to read it off the stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) page = page_in_rbio(rbio, stripe, pagenr, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) page = rbio_stripe_page(rbio, stripe, pagenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) * the bio cache may have handed us an uptodate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) * page. If so, be happy and use it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) if (PageUptodate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) ret = rbio_add_io_page(rbio, &bio_list, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) stripe, pagenr, rbio->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) goto cleanup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) bios_to_read = bio_list_size(&bio_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) if (!bios_to_read) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) * this can happen if others have merged with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) * us, it means there is nothing left to read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) * But if there are missing devices it may not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) * safe to do the full stripe write yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) * the bbio may be freed once we submit the last bio. Make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) * not to touch it after that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) atomic_set(&rbio->stripes_pending, bios_to_read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) while ((bio = bio_list_pop(&bio_list))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) bio->bi_private = rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) bio->bi_end_io = raid56_parity_scrub_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) bio->bi_opf = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) /* the actual write will happen once the reads are done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) cleanup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) rbio_orig_end_io(rbio, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) while ((bio = bio_list_pop(&bio_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) validate_rbio_for_parity_scrub(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) static void scrub_parity_work(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) rbio = container_of(work, struct btrfs_raid_bio, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) raid56_parity_scrub_stripe(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) if (!lock_stripe_add(rbio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) start_async_work(rbio, scrub_parity_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) /* The following code is used for dev replace of a missing RAID 5/6 device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) struct btrfs_raid_bio *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) struct btrfs_bio *bbio, u64 length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) rbio = alloc_rbio(fs_info, bbio, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) if (IS_ERR(rbio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) bio_list_add(&rbio->bio_list, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) * This is a special bio which is used to hold the completion handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) * and make the scrub rbio is similar to the other types
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) ASSERT(!bio->bi_iter.bi_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) rbio->faila = find_logical_bio_stripe(rbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) if (rbio->faila == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) kfree(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) * When we get bbio, we have already increased bio_counter, record it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) * so we can free it at rbio_orig_end_io()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) rbio->generic_bio_cnt = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) return rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) if (!lock_stripe_add(rbio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) start_async_work(rbio, read_rebuild_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) }