^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #ifndef _RAID1_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #define _RAID1_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * each barrier unit size is 64MB fow now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * note: it must be larger than RESYNC_DEPTH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #define BARRIER_UNIT_SECTOR_BITS 17
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #define BARRIER_UNIT_SECTOR_SIZE (1<<17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * In struct r1conf, the following members are related to I/O barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * buckets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * atomic_t *nr_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * atomic_t *nr_waiting;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * atomic_t *nr_queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * atomic_t *barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Each of them points to array of atomic_t variables, each array is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * designed to have BARRIER_BUCKETS_NR elements and occupy a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * memory page. The data width of atomic_t variables is 4 bytes, equal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * to 1<<(ilog2(sizeof(atomic_t))), BARRIER_BUCKETS_NR_BITS is defined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * as (PAGE_SHIFT - ilog2(sizeof(int))) to make sure an array of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * atomic_t variables with BARRIER_BUCKETS_NR elements just exactly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * occupies a single memory page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * There are three safe ways to access raid1_info.rdev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * 1/ when holding mddev->reconfig_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * 2/ when resync/recovery is known to be happening - i.e. in code that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * called as part of performing resync/recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * and if it is non-NULL, increment rdev->nr_pending before dropping the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * RCU lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * When .rdev is set to NULL, the nr_pending count checked again and if it has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * been incremented, the pointer is put back in .rdev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct raid1_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct md_rdev *rdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) sector_t head_position;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* When choose the best device for a read (read_balance())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * we try to keep sequential reads one the same device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) sector_t next_seq_sect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) sector_t seq_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * memory pools need a pointer to the mddev, so they can force an unplug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * when memory is tight, and a count of the number of drives that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * pool was allocated for, so they know how much to allocate and free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * mddev->raid_disks cannot be used, as it can change while a pool is active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * These two datums are stored in a kmalloced struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * The 'raid_disks' here is twice the raid_disks in r1conf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * This allows space for each 'real' device can have a replacement in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * second half of the array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct pool_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct mddev *mddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) int raid_disks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct r1conf {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct mddev *mddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct raid1_info *mirrors; /* twice 'raid_disks' to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * allow for replacements.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) int raid_disks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) spinlock_t device_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /* list of 'struct r1bio' that need to be processed by raid1d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * whether to retry a read, writeout a resync or recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * block, or anything else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct list_head retry_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* A separate list of r1bio which just need raid_end_bio_io called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * This mustn't happen for writes which had any errors if the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * needs to be written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) struct list_head bio_end_io_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) /* queue pending writes to be submitted on unplug */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) struct bio_list pending_bio_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) int pending_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) /* for use when syncing mirrors:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * We don't allow both normal IO and resync/recovery IO at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * the same time - resync/recovery can only happen when there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * is no other IO. So when either is active, the other has to wait.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * See more details description in raid1.c near raise_barrier().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) wait_queue_head_t wait_barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) spinlock_t resync_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) atomic_t nr_sync_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) atomic_t *nr_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) atomic_t *nr_waiting;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) atomic_t *nr_queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) atomic_t *barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) int array_frozen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /* Set to 1 if a full sync is needed, (fresh device added).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * Cleared when a sync completes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) int fullsync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /* When the same as mddev->recovery_disabled we don't allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * recovery to be attempted as we expect a read error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) int recovery_disabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) /* poolinfo contains information about the content of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * mempools - it changes when the array grows or shrinks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) struct pool_info *poolinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) mempool_t r1bio_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) mempool_t r1buf_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) struct bio_set bio_split;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* temporary buffer to synchronous IO when attempting to repair
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * a read error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct page *tmppage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /* When taking over an array from a different personality, we store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * the new thread here until we fully activate the array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) struct md_thread *thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /* Keep track of cluster resync window to send to other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) sector_t cluster_sync_low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) sector_t cluster_sync_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * this is our 'private' RAID1 bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * it contains information about what kind of IO operations were started
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * for this RAID1 operation, and about their status:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) struct r1bio {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) atomic_t remaining; /* 'have we finished' count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * used from IRQ handlers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) atomic_t behind_remaining; /* number of write-behind ios remaining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * in this BehindIO request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) sector_t sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) int sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) unsigned long state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) struct mddev *mddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * original bio going to /dev/mdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) struct bio *master_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * if the IO is in READ direction, then this is where we read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) int read_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) struct list_head retry_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * When R1BIO_BehindIO is set, we store pages for write behind
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * in behind_master_bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) struct bio *behind_master_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * if the IO is in WRITE direction, then multiple bios are used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * We choose the number when they are allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) struct bio *bios[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) /* bits for r1bio.state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) enum r1bio_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) R1BIO_Uptodate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) R1BIO_IsSync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) R1BIO_Degraded,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) R1BIO_BehindIO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /* Set ReadError on bios that experience a readerror so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * raid1d knows what to do with them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) R1BIO_ReadError,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /* For write-behind requests, we call bi_end_io when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * the last non-write-behind device completes, providing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * any write was successful. Otherwise we call when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * any write-behind write succeeds, otherwise we call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * with failure when last write completes (and all failed).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * Record that bi_end_io was called with this flag...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) R1BIO_Returned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) /* If a write for this request means we can clear some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * known-bad-block records, we set this flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) R1BIO_MadeGood,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) R1BIO_WriteError,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) R1BIO_FailFast,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) static inline int sector_to_idx(sector_t sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return hash_long(sector >> BARRIER_UNIT_SECTOR_BITS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) BARRIER_BUCKETS_NR_BITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #endif