^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) md.h : kernel internal structure of the Linux MD driver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #ifndef _MD_MD_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #define _MD_MD_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/badblocks.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/kobject.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "md-cluster.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define MaxSector (~(sector_t)0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * These flags should really be called "NO_RETRY" rather than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * "FAILFAST" because they don't make any promise about time lapse,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * only about the number of retries, which will be zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * REQ_FAILFAST_DRIVER is not included because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.")
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * seems to suggest that the errors it avoids retrying should usually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * be retried.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * The struct embedded in rdev is used to serialize IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) struct serial_in_rdev {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) struct rb_root_cached serial_rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) spinlock_t serial_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) wait_queue_head_t serial_io_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * MD's 'extended' device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct md_rdev {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) struct list_head same_set; /* RAID devices within the same set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) sector_t sectors; /* Device size (in 512bytes sectors) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct mddev *mddev; /* RAID array if running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) int last_events; /* IO event timestamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * If meta_bdev is non-NULL, it means that a separate device is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * being used to store the metadata (superblock/bitmap) which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * would otherwise be contained on the same device as the data (bdev).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) struct block_device *meta_bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct block_device *bdev; /* block device handle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct page *sb_page, *bb_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) int sb_loaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) __u64 sb_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) sector_t data_offset; /* start of data in array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) sector_t new_data_offset;/* only relevant while reshaping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) sector_t sb_start; /* offset of the super block (in 512byte sectors) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) int sb_size; /* bytes in the superblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) int preferred_minor; /* autorun support */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct kobject kobj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) /* A device can be in one of three states based on two flags:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * Not working: faulty==1 in_sync==0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * Fully working: faulty==0 in_sync==1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * Working, but not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * in sync with array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * faulty==0 in_sync==0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * It can never have faulty==1, in_sync==1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * This reduces the burden of testing multiple flags in many cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) unsigned long flags; /* bit set of 'enum flag_bits' bits. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) wait_queue_head_t blocked_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) int desc_nr; /* descriptor index in the superblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) int raid_disk; /* role of device in array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) int new_raid_disk; /* role that the device will have in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * the array after a level-change completes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) int saved_raid_disk; /* role that device used to have in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * array and could again if we did a partial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * resync from the bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) sector_t recovery_offset;/* If this device has been partially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * recovered, this is where we were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * up to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) sector_t journal_tail; /* If this device is a journal device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * this is the journal tail (journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * recovery start point)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) atomic_t nr_pending; /* number of pending requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * only maintained for arrays that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * support hot removal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) atomic_t read_errors; /* number of consecutive read errors that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * we have tried to ignore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) time64_t last_read_error; /* monotonic time since our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * last read error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) atomic_t corrected_errors; /* number of corrected read errors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * for reporting to userspace and storing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * in superblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct serial_in_rdev *serial; /* used for raid1 io serialization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct work_struct del_work; /* used for delayed sysfs removal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) struct kernfs_node *sysfs_state; /* handle for 'state'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * sysfs entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /* handle for 'unacknowledged_bad_blocks' sysfs dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) struct kernfs_node *sysfs_unack_badblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /* handle for 'bad_blocks' sysfs dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) struct kernfs_node *sysfs_badblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) struct badblocks badblocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) short offset; /* Offset from superblock to start of PPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * Not used by external metadata. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) unsigned int size; /* Size in sectors of the PPL space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) sector_t sector; /* First sector of the PPL space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) } ppl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) enum flag_bits {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) Faulty, /* device is known to have a fault */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) In_sync, /* device is in_sync with rest of array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) Bitmap_sync, /* ..actually, not quite In_sync. Need a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * bitmap-based recovery to get fully in sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * The bit is only meaningful before device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * has been passed to pers->hot_add_disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) WriteMostly, /* Avoid reading if at all possible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) AutoDetected, /* added by auto-detect */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) Blocked, /* An error occurred but has not yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * been acknowledged by the metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * handler, so don't allow writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * until it is cleared */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) WriteErrorSeen, /* A write error has been seen on this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) FaultRecorded, /* Intermediate state for clearing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * Blocked. The Fault is/will-be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * recorded in the metadata, but that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * metadata hasn't been stored safely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * on disk yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) BlockedBadBlocks, /* A writer is blocked because they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * found an unacknowledged bad-block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * This can safely be cleared at any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * time, and the writer will re-check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * It may be set at any time, and at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * worst the writer will timeout and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * re-check. So setting it as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * accurately as possible is good, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * not absolutely critical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) WantReplacement, /* This device is a candidate to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * hot-replaced, either because it has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * reported some faults, or because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * of explicit request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) Replacement, /* This device is a replacement for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * a want_replacement device with same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * raid_disk number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) Candidate, /* For clustered environments only:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * This device is seen locally but not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * by the whole cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) Journal, /* This device is used as journal for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * raid-5/6.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * Usually, this device should be faster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * than other devices in the array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) ClusterRemove,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) RemoveSynchronized, /* synchronize_rcu() was called after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * this device was known to be faulty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * so it is safe to remove without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * another synchronize_rcu() call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) ExternalBbl, /* External metadata provides bad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * block management for a disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) FailFast, /* Minimal retries should be attempted on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * this device, so use REQ_FAILFAST_DEV.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * Also don't try to repair failed reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * It is expects that no bad block log
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * is present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) LastDev, /* Seems to be the last working dev as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * it didn't fail, so don't use FailFast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * any more for metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) CollisionCheck, /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * check if there is collision between raid1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * serial bios.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) sector_t *first_bad, int *bad_sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (unlikely(rdev->badblocks.count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) first_bad, bad_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) if (rv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) *first_bad -= rdev->data_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) return rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) int is_new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) int is_new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) struct md_cluster_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) enum mddev_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) MD_CLOSING, /* If set, we are closing the array, do not open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * it then */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) MD_HAS_JOURNAL, /* The raid array has journal feature set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * already took resync lock, need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * release the lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * supported as calls to md_error() will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * never cause the array to become failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) MD_HAS_PPL, /* The raid array has PPL feature set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * the metadata without taking reconfig_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) MD_UPDATING_SB, /* md_check_recovery is updating the metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * without explicitly holding reconfig_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) MD_NOT_READY, /* do_md_run() is active, so 'array_state'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * must not report that array is ready yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * I/O in case an array member is gone/failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) enum mddev_sb_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) MD_SB_CHANGE_DEVS, /* Some device status has changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) MD_SB_CHANGE_CLEAN, /* transition to or from 'clean' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) MD_SB_CHANGE_PENDING, /* switch from 'clean' to 'active' in progress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) #define NR_SERIAL_INFOS 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) /* record current range of serialize IOs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) struct serial_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) struct rb_node node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) sector_t start; /* start sector of rb node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) sector_t last; /* end sector of rb node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) sector_t _subtree_last; /* highest sector in subtree of rb node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) struct mddev {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) void *private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct md_personality *pers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) dev_t unit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) int md_minor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) struct list_head disks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) unsigned long sb_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) int suspended;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) atomic_t active_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) int ro;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) int sysfs_active; /* set when sysfs deletes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * are happening, so run/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * takeover/stop are not safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) struct gendisk *gendisk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct kobject kobj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) int hold_active;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) #define UNTIL_IOCTL 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) #define UNTIL_STOP 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) /* Superblock information */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) int major_version,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) minor_version,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) patch_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) int persistent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) int external; /* metadata is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * managed externally */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) char metadata_type[17]; /* externally set*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) int chunk_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) time64_t ctime, utime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) int level, layout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) char clevel[16];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) int raid_disks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) int max_disks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) sector_t dev_sectors; /* used size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * component devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) sector_t array_sectors; /* exported array size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) int external_size; /* size managed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * externally */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) __u64 events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) /* If the last 'event' was simply a clean->dirty transition, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * we didn't write it to the spares, then it is safe and simple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * to just decrement the event count on a dirty->clean transition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * So we record that possibility here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) int can_decrease_events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) char uuid[16];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* If the array is being reshaped, we need to record the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * new shape and an indication of where we are up to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * This is written to the superblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * If reshape_position is MaxSector, then no reshape is happening (yet).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) sector_t reshape_position;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) int delta_disks, new_level, new_layout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) int new_chunk_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) int reshape_backwards;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) struct md_thread *thread; /* management thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) struct md_thread *sync_thread; /* doing resync or reconstruct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* 'last_sync_action' is initialized to "none". It is set when a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * sync operation (i.e "data-check", "requested-resync", "resync",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * "recovery", or "reshape") is started. It holds this value even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * when the sync thread is "frozen" (interrupted) or "idle" (stopped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) * or finished). It is overwritten when a new sync operation is begun.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) char *last_sync_action;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) sector_t curr_resync; /* last block scheduled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /* As resync requests can complete out of order, we cannot easily track
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * how much resync has been completed. So we occasionally pause until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * everything completes, then set curr_resync_completed to curr_resync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * As such it may be well behind the real resync mark, but it is a value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * we are certain of.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) sector_t curr_resync_completed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) unsigned long resync_mark; /* a recent timestamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) sector_t resync_mark_cnt;/* blocks written at resync_mark */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) sector_t curr_mark_cnt; /* blocks scheduled now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) sector_t resync_max_sectors; /* may be set by personality */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) atomic64_t resync_mismatches; /* count of sectors where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * parity/replica mismatch found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) /* allow user-space to request suspension of IO to regions of the array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) sector_t suspend_lo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) sector_t suspend_hi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) /* if zero, use the system-wide default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) int sync_speed_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) int sync_speed_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) /* resync even though the same disks are shared among md-devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) int parallel_resync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) int ok_start_degraded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) unsigned long recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) /* If a RAID personality determines that recovery (of a particular
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * device) will fail due to a read error on the source device, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) * takes a copy of this number and does not attempt recovery again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * until this number changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) int recovery_disabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) int in_sync; /* know to not need resync */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * that we are never stopping an array while it is open.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * 'reconfig_mutex' protects all other reconfiguration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * These locks are separate due to conflicting interactions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * with bdev->bd_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * Lock ordering is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * reconfig_mutex -> bd_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) struct mutex open_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) struct mutex reconfig_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) atomic_t active; /* general refcount */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) atomic_t openers; /* number of active opens */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) int changed; /* True if we might need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * reread partition info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) int degraded; /* whether md should consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * adding a spare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) atomic_t recovery_active; /* blocks scheduled, but not written */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) wait_queue_head_t recovery_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) sector_t recovery_cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) sector_t resync_min; /* user requested sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * starts here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) sector_t resync_max; /* resync should pause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * when it gets here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) struct kernfs_node *sysfs_state; /* handle for 'array_state'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * file in sysfs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct kernfs_node *sysfs_action; /* handle for 'sync_action' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) struct kernfs_node *sysfs_completed; /*handle for 'sync_completed' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) struct kernfs_node *sysfs_level; /*handle for 'level' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) struct work_struct del_work; /* used for delayed sysfs removal */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) /* "lock" protects:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) * flush_bio transition from NULL to !NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * rdev superblocks, events
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * clearing MD_CHANGE_*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * in_sync - and related safemode and MD_CHANGE changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * pers (also protected by reconfig_mutex and pending IO).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * clearing ->bitmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * clearing ->bitmap_info.file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * changing ->resync_{min,max}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) * setting MD_RECOVERY_RUNNING (which interacts with resync_{min,max})
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) wait_queue_head_t sb_wait; /* for waiting on superblock updates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) atomic_t pending_writes; /* number of active superblock writes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) unsigned int safemode; /* if set, update "clean" superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * when no writes pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) unsigned int safemode_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) struct timer_list safemode_timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) struct percpu_ref writes_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) int sync_checkers; /* # of threads checking writes_pending */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) struct request_queue *queue; /* for plugging ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) struct bitmap *bitmap; /* the bitmap for the device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) struct file *file; /* the bitmap file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) loff_t offset; /* offset from superblock of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * start of bitmap. May be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * negative, but not '0'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * For external metadata, offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) * from start of device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) unsigned long space; /* space available at this offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) loff_t default_offset; /* this is the offset to use when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * hot-adding a bitmap. It should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * eventually be settable by sysfs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) unsigned long default_space; /* space available at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * default offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) struct mutex mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) unsigned long chunksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) unsigned long daemon_sleep; /* how many jiffies between updates? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) unsigned long max_write_behind; /* write-behind mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) int external;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) int nodes; /* Maximum number of nodes in the cluster */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) char cluster_name[64]; /* Name of the cluster */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) } bitmap_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) atomic_t max_corr_read_errors; /* max read retries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) struct list_head all_mddevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) struct attribute_group *to_remove;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) struct bio_set bio_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) struct bio_set sync_set; /* for sync operations like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * metadata and bitmap writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) /* Generic flush handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * The last to finish preflush schedules a worker to submit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * the rest of the request (without the REQ_PREFLUSH flag).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) struct bio *flush_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) atomic_t flush_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) ktime_t start_flush, last_flush; /* last_flush is when the last completed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * flush was started.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) struct work_struct flush_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) struct work_struct event_work; /* used by dm to report failure event */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) mempool_t *serial_info_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) struct md_cluster_info *cluster_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) unsigned int good_device_nr; /* good device num within cluster raid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) unsigned int noio_flag; /* for memalloc scope API */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) bool has_superblocks:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) bool fail_last_dev:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) bool serialize_policy:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) enum recovery_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * If neither SYNC or RESHAPE are set, then it is a recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) MD_RECOVERY_RESHAPE, /* A reshape is happening */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) MD_RESYNCING_REMOTE, /* remote node is running resync thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) static inline int __must_check mddev_lock(struct mddev *mddev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) return mutex_lock_interruptible(&mddev->reconfig_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) /* Sometimes we need to take the lock in a situation where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * failure due to interrupts is not acceptable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static inline void mddev_lock_nointr(struct mddev *mddev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) mutex_lock(&mddev->reconfig_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) static inline int mddev_trylock(struct mddev *mddev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) return mutex_trylock(&mddev->reconfig_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) extern void mddev_unlock(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) atomic_add(nr_sectors, &bio->bi_disk->sync_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) struct md_personality
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct module *owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * start up works that do NOT require md_thread. tasks that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * requires md_thread should go into start()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) int (*run)(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) /* start up works that require md threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) int (*start)(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) void (*free)(struct mddev *mddev, void *priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) void (*status)(struct seq_file *seq, struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) /* error_handler must set ->faulty and clear ->in_sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * if appropriate, and should abort recovery if needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) int (*spare_active) (struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) int (*resize) (struct mddev *mddev, sector_t sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) int (*check_reshape) (struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) int (*start_reshape) (struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) void (*finish_reshape) (struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) void (*update_reshape_pos) (struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) /* quiesce suspends or resumes internal processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) * 1 - stop new actions and wait for action io to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * 0 - return to normal behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) void (*quiesce) (struct mddev *mddev, int quiesce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) /* takeover is used to transition an array from one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) * personality to another. The new personality must be able
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * to handle the data in the current layout.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * e.g. 2drive raid1 -> 2drive raid5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * ndrive raid5 -> degraded n+1drive raid6 with special layout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * If the takeover succeeds, a new 'private' structure is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * This needs to be installed and then ->run used to activate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) * array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) void *(*takeover) (struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) /* Changes the consistency policy of an active array. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) struct md_sysfs_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) struct attribute attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) ssize_t (*show)(struct mddev *, char *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) ssize_t (*store)(struct mddev *, const char *, size_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) extern struct attribute_group md_bitmap_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return sysfs_get_dirent(sd, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) return sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) if (sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) sysfs_notify_dirent(sd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) static inline char * mdname (struct mddev * mddev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) char nm[20];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) if (!test_bit(Replacement, &rdev->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) !test_bit(Journal, &rdev->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) mddev->kobj.sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) sprintf(nm, "rd%d", rdev->raid_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) char nm[20];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (!test_bit(Replacement, &rdev->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) !test_bit(Journal, &rdev->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) mddev->kobj.sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) sprintf(nm, "rd%d", rdev->raid_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) sysfs_remove_link(&mddev->kobj, nm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) * iterates through some rdev ringlist. It's safe to remove the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) * current 'rdev'. Dont touch 'tmp' though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) #define rdev_for_each_list(rdev, tmp, head) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) list_for_each_entry_safe(rdev, tmp, head, same_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * iterates through the 'same array disks' ringlist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) #define rdev_for_each(rdev, mddev) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) list_for_each_entry(rdev, &((mddev)->disks), same_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) #define rdev_for_each_safe(rdev, tmp, mddev) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) #define rdev_for_each_rcu(rdev, mddev) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) struct md_thread {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) void (*run) (struct md_thread *thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) struct mddev *mddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) wait_queue_head_t wqueue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) unsigned long timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) void *private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) #define THREAD_WAKEUP 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) static inline void safe_put_page(struct page *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (p) put_page(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) extern int register_md_personality(struct md_personality *p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) extern int unregister_md_personality(struct md_personality *p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) extern int register_md_cluster_operations(struct md_cluster_operations *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) struct module *module);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) extern int unregister_md_cluster_operations(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) extern int md_setup_cluster(struct mddev *mddev, int nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) extern void md_cluster_stop(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) extern struct md_thread *md_register_thread(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) void (*run)(struct md_thread *thread),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) struct mddev *mddev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) const char *name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) extern void md_unregister_thread(struct md_thread **threadp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) extern void md_wakeup_thread(struct md_thread *thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) extern void md_check_recovery(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) extern void md_reap_sync_thread(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) extern int mddev_init_writes_pending(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) extern bool md_write_start(struct mddev *mddev, struct bio *bi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) extern void md_write_inc(struct mddev *mddev, struct bio *bi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) extern void md_write_end(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) extern void md_finish_reshape(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) sector_t sector, int size, struct page *page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) extern int md_super_wait(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) struct page *page, int op, int op_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) bool metadata_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) extern void md_do_sync(struct md_thread *thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) extern void md_new_event(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) extern void md_allow_write(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) extern int md_check_no_bitmap(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) extern int md_integrity_register(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) extern void mddev_init(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) extern int md_run(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) extern int md_start(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) extern void md_stop(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) extern void md_stop_writes(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) extern int md_rdev_init(struct md_rdev *rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) extern void md_rdev_clear(struct md_rdev *rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) extern void md_handle_request(struct mddev *mddev, struct bio *bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) extern void mddev_suspend(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) extern void mddev_resume(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) extern void md_reload_sb(struct mddev *mddev, int raid_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) extern void md_update_sb(struct mddev *mddev, int force);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) extern void md_kick_rdev_from_array(struct md_rdev * rdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) bool is_suspend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) bool is_suspend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) int flags = rdev->bdev->bd_disk->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) if (!(flags & GENHD_FL_UP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) pr_warn("md: %s: %s array has a missing/failed member\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) mdname(rdev->mddev), md_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) int faulty = test_bit(Faulty, &rdev->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) md_wakeup_thread(mddev->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) extern struct md_cluster_operations *md_cluster_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) static inline int mddev_is_clustered(struct mddev *mddev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) /* clear unsupported mddev_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) unsigned long unsupported_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) mddev->flags &= ~unsupported_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) if (bio_op(bio) == REQ_OP_WRITE_SAME &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) !bio->bi_disk->queue->limits.max_write_same_sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) mddev->queue->limits.max_write_same_sectors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) mddev->queue->limits.max_write_zeroes_sectors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) struct mdu_array_info_s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) struct mdu_disk_info_s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) extern int mdp_major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) void md_autostart_arrays(int part);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) int do_md_run(struct mddev *mddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) extern const struct block_device_operations md_fops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) #endif /* _MD_MD_H */