^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2011, 2012 STRATO. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <crypto/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "discard.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "volumes.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "disk-io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "ordered-data.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "transaction.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "backref.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "extent_io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "dev-replace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "check-integrity.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "rcu-string.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "raid56.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include "block-group.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * This is only the first step towards a full-features scrub. It reads all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * extent and super block and verifies the checksums. In case a bad checksum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * is found or the extent cannot be read, good data will be written back if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * any can be found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * Future enhancements:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * - In case an unrepairable extent is encountered, track which files are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * affected and report them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * - track and record media errors, throw out bad devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * - add a mode to also read unallocated space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) struct scrub_block;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) struct scrub_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * the following three values only influence the performance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * The last one configures the number of parallel and outstanding I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * operations. The first two values configure an upper limit for the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * of (dynamically allocated) pages that are added to a bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #define SCRUB_PAGES_PER_RD_BIO 32 /* 128k per bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define SCRUB_PAGES_PER_WR_BIO 32 /* 128k per bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #define SCRUB_BIOS_PER_SCTX 64 /* 8MB per device in flight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * the following value times PAGE_SIZE needs to be large enough to match the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * largest node/leaf/sector size that shall be supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * Values larger than BTRFS_STRIPE_LEN are not supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct scrub_recover {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct btrfs_bio *bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) u64 map_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct scrub_page {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct scrub_block *sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) u64 flags; /* extent flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) u64 generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) u64 physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) u64 physical_for_dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) atomic_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) unsigned int mirror_num:8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) unsigned int have_csum:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) unsigned int io_error:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) u8 csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct scrub_recover *recover;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) struct scrub_bio {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) struct scrub_ctx *sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) blk_status_t status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) u64 physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) int page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) int next_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct btrfs_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) struct scrub_block {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) int page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) atomic_t outstanding_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) refcount_t refs; /* free mem on transition to zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct scrub_ctx *sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) struct scrub_parity *sparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) unsigned int header_error:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) unsigned int checksum_error:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) unsigned int no_io_error_seen:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) unsigned int generation_error:1; /* also sets header_error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /* The following is for the data used to check parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /* It is for the data with checksum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) unsigned int data_corrected:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct btrfs_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* Used for the chunks with parity stripe such RAID5/6 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct scrub_parity {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) struct scrub_ctx *sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct btrfs_device *scrub_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) u64 logic_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) u64 logic_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) int nsectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) u64 stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) struct list_head spages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /* Work of parity check and repair */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) struct btrfs_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /* Mark the parity blocks which have data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) unsigned long *dbitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * Mark the parity blocks which have data, but errors happen when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * read data or check data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) unsigned long *ebitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) unsigned long bitmap[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) struct scrub_ctx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct btrfs_fs_info *fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) int first_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) int curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) atomic_t bios_in_flight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) atomic_t workers_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) spinlock_t list_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) wait_queue_head_t list_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) u16 csum_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) struct list_head csum_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) atomic_t cancel_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) int readonly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) int pages_per_rd_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) int is_dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) struct scrub_bio *wr_curr_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct mutex wr_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct btrfs_device *wr_tgtdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) bool flush_all_writes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * statistics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct btrfs_scrub_progress stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) spinlock_t stat_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * Use a ref counter to avoid use-after-free issues. Scrub workers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * decrement bios_in_flight and workers_pending and then do a wakeup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * on the list_wait wait queue. We must ensure the main scrub task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * doesn't free the scrub context before or while the workers are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * doing the wakeup() call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) struct scrub_warning {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) u64 extent_item_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) const char *errstr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) u64 physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct full_stripe_lock {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct rb_node node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) u64 refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) struct mutex mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct scrub_block *sblocks_for_recheck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) struct scrub_block *sblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) int retry_failed_mirror);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) static void scrub_recheck_block_checksum(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) struct scrub_block *sblock_good);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) struct scrub_block *sblock_good,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) int page_num, int force_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) int page_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static int scrub_checksum_data(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) static int scrub_checksum_tree_block(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) static int scrub_checksum_super(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) static void scrub_block_get(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) static void scrub_block_put(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) static void scrub_page_get(struct scrub_page *spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) static void scrub_page_put(struct scrub_page *spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) static void scrub_parity_get(struct scrub_parity *sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static void scrub_parity_put(struct scrub_parity *sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct scrub_page *spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) u64 physical, struct btrfs_device *dev, u64 flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) u64 gen, int mirror_num, u8 *csum, int force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) u64 physical_for_dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) static void scrub_bio_end_io(struct bio *bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) static void scrub_bio_end_io_worker(struct btrfs_work *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) static void scrub_block_complete(struct scrub_block *sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) u64 extent_logical, u64 extent_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) u64 *extent_physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) struct btrfs_device **extent_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) int *extent_mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) struct scrub_page *spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) static void scrub_wr_submit(struct scrub_ctx *sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) static void scrub_wr_bio_end_io(struct bio *bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) static void scrub_put_ctx(struct scrub_ctx *sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) static inline int scrub_is_page_on_raid56(struct scrub_page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) return page->recover &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) refcount_inc(&sctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) atomic_inc(&sctx->bios_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) atomic_dec(&sctx->bios_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) wake_up(&sctx->list_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) scrub_put_ctx(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) while (atomic_read(&fs_info->scrub_pause_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) wait_event(fs_info->scrub_pause_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) atomic_read(&fs_info->scrub_pause_req) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) static void scrub_pause_on(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) atomic_inc(&fs_info->scrubs_paused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) wake_up(&fs_info->scrub_pause_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) static void scrub_pause_off(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) __scrub_blocked_if_needed(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) atomic_dec(&fs_info->scrubs_paused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) wake_up(&fs_info->scrub_pause_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) scrub_pause_on(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) scrub_pause_off(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * Insert new full stripe lock into full stripe locks tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * Return pointer to existing or newly inserted full_stripe_lock structure if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * everything works well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * Return ERR_PTR(-ENOMEM) if we failed to allocate memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * NOTE: caller must hold full_stripe_locks_root->lock before calling this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static struct full_stripe_lock *insert_full_stripe_lock(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) struct btrfs_full_stripe_locks_tree *locks_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) u64 fstripe_logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) struct rb_node **p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) struct rb_node *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct full_stripe_lock *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) struct full_stripe_lock *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) lockdep_assert_held(&locks_root->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) p = &locks_root->root.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) entry = rb_entry(parent, struct full_stripe_lock, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) if (fstripe_logical < entry->logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) } else if (fstripe_logical > entry->logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) entry->refs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) return entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * Insert new lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) ret = kmalloc(sizeof(*ret), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) ret->logical = fstripe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) ret->refs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) mutex_init(&ret->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) rb_link_node(&ret->node, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) rb_insert_color(&ret->node, &locks_root->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * Search for a full stripe lock of a block group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * Return pointer to existing full stripe lock if found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * Return NULL if not found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) static struct full_stripe_lock *search_full_stripe_lock(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) struct btrfs_full_stripe_locks_tree *locks_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) u64 fstripe_logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) struct rb_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) struct full_stripe_lock *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) lockdep_assert_held(&locks_root->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) node = locks_root->root.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) while (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) entry = rb_entry(node, struct full_stripe_lock, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (fstripe_logical < entry->logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) node = node->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) else if (fstripe_logical > entry->logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) node = node->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) return entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * Helper to get full stripe logical from a normal bytenr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * Caller must ensure @cache is a RAID56 block group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) u64 ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * Due to chunk item size limit, full stripe length should not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * larger than U32_MAX. Just a sanity check here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * round_down() can only handle power of 2, while RAID56 full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * stripe length can be 64KiB * n, so we need to manually round down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) cache->full_stripe_len + cache->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * Lock a full stripe to avoid concurrency of recovery and read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * It's only used for profiles with parities (RAID5/6), for other profiles it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * does nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * Return 0 if we locked full stripe covering @bytenr, with a mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * So caller must call unlock_full_stripe() at the same context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * Return <0 if encounters error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) bool *locked_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) struct btrfs_block_group *bg_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) struct btrfs_full_stripe_locks_tree *locks_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) struct full_stripe_lock *existing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) u64 fstripe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) *locked_ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) if (!bg_cache) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) /* Profiles not based on parity don't need full stripe lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) locks_root = &bg_cache->full_stripe_locks_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) /* Now insert the full stripe lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) mutex_lock(&locks_root->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) existing = insert_full_stripe_lock(locks_root, fstripe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) mutex_unlock(&locks_root->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (IS_ERR(existing)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) ret = PTR_ERR(existing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) mutex_lock(&existing->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) *locked_ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) btrfs_put_block_group(bg_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * Unlock a full stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * NOTE: Caller must ensure it's the same context calling corresponding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * lock_full_stripe().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * Return 0 if we unlock full stripe without problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * Return <0 for error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) struct btrfs_block_group *bg_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) struct btrfs_full_stripe_locks_tree *locks_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) struct full_stripe_lock *fstripe_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) u64 fstripe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) bool freeit = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) /* If we didn't acquire full stripe lock, no need to continue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) if (!bg_cache) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) locks_root = &bg_cache->full_stripe_locks_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) mutex_lock(&locks_root->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) /* Unpaired unlock_full_stripe() detected */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (!fstripe_lock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) mutex_unlock(&locks_root->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (fstripe_lock->refs == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) fstripe_lock->logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) fstripe_lock->refs--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (fstripe_lock->refs == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) rb_erase(&fstripe_lock->node, &locks_root->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) freeit = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) mutex_unlock(&locks_root->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) mutex_unlock(&fstripe_lock->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (freeit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) kfree(fstripe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) btrfs_put_block_group(bg_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) static void scrub_free_csums(struct scrub_ctx *sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) while (!list_empty(&sctx->csum_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) struct btrfs_ordered_sum *sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) sum = list_first_entry(&sctx->csum_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) struct btrfs_ordered_sum, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) list_del(&sum->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) kfree(sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) if (!sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /* this can happen when scrub is cancelled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) if (sctx->curr != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) struct scrub_bio *sbio = sctx->bios[sctx->curr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) for (i = 0; i < sbio->page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) WARN_ON(!sbio->pagev[i]->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) scrub_block_put(sbio->pagev[i]->sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) bio_put(sbio->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) struct scrub_bio *sbio = sctx->bios[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) if (!sbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) kfree(sbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) kfree(sctx->wr_curr_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) scrub_free_csums(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) kfree(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) static void scrub_put_ctx(struct scrub_ctx *sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (refcount_dec_and_test(&sctx->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) scrub_free_ctx(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) struct btrfs_fs_info *fs_info, int is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct scrub_ctx *sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) if (!sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) goto nomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) refcount_set(&sctx->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) sctx->is_dev_replace = is_dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) sctx->curr = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) sctx->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) INIT_LIST_HEAD(&sctx->csum_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) struct scrub_bio *sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) if (!sbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) goto nomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) sctx->bios[i] = sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) sbio->index = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) sbio->sctx = sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) sbio->page_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) if (i != SCRUB_BIOS_PER_SCTX - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) sctx->bios[i]->next_free = i + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) sctx->bios[i]->next_free = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) sctx->first_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) atomic_set(&sctx->bios_in_flight, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) atomic_set(&sctx->workers_pending, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) atomic_set(&sctx->cancel_req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) spin_lock_init(&sctx->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) spin_lock_init(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) init_waitqueue_head(&sctx->list_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) WARN_ON(sctx->wr_curr_bio != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) mutex_init(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) sctx->wr_curr_bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) if (is_dev_replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) WARN_ON(!fs_info->dev_replace.tgtdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) sctx->flush_all_writes = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) return sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) nomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) scrub_free_ctx(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) void *warn_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) u64 isize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) u32 nlink;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) unsigned nofs_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct extent_buffer *eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) struct btrfs_inode_item *inode_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) struct scrub_warning *swarn = warn_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) struct inode_fs_paths *ipath = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) struct btrfs_root *local_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) local_root = btrfs_get_fs_root(fs_info, root, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) if (IS_ERR(local_root)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) ret = PTR_ERR(local_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) * this makes the path point to (inum INODE_ITEM ioff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) key.objectid = inum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) key.type = BTRFS_INODE_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) btrfs_put_root(local_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) btrfs_release_path(swarn->path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) eb = swarn->path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) struct btrfs_inode_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) isize = btrfs_inode_size(eb, inode_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) nlink = btrfs_inode_nlink(eb, inode_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) btrfs_release_path(swarn->path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * init_path might indirectly call vmalloc, or use GFP_KERNEL. Scrub
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * uses GFP_NOFS in this context, so we keep it consistent but it does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * not seem to be strictly necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) nofs_flag = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) ipath = init_ipath(4096, local_root, swarn->path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) memalloc_nofs_restore(nofs_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) if (IS_ERR(ipath)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) btrfs_put_root(local_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) ret = PTR_ERR(ipath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) ipath = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) ret = paths_from_inode(inum, ipath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * we deliberately ignore the bit ipath might have been too small to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) * hold all of the paths here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) for (i = 0; i < ipath->fspath->elem_cnt; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) btrfs_warn_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) swarn->errstr, swarn->logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) rcu_str_deref(swarn->dev->name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) swarn->physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) root, inum, offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) min(isize - offset, (u64)PAGE_SIZE), nlink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) (char *)(unsigned long)ipath->fspath->val[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) btrfs_put_root(local_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) free_ipath(ipath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) btrfs_warn_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) swarn->errstr, swarn->logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) rcu_str_deref(swarn->dev->name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) swarn->physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) root, inum, offset, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) free_ipath(ipath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) struct btrfs_fs_info *fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) struct btrfs_key found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) struct extent_buffer *eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) struct btrfs_extent_item *ei;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) struct scrub_warning swarn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) unsigned long ptr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) u64 extent_item_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) u64 flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) u64 ref_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) u32 item_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) u8 ref_level = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) WARN_ON(sblock->page_count < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) dev = sblock->pagev[0]->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) fs_info = sblock->sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) swarn.physical = sblock->pagev[0]->physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) swarn.logical = sblock->pagev[0]->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) swarn.errstr = errstr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) swarn.dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) &flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) extent_item_pos = swarn.logical - found_key.objectid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) swarn.extent_item_size = found_key.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) eb = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) item_size = btrfs_item_size_nr(eb, path->slots[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) item_size, &ref_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) &ref_level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) btrfs_warn_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) "%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) errstr, swarn.logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) rcu_str_deref(dev->name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) swarn.physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) ref_level ? "node" : "leaf",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) ret < 0 ? -1 : ref_level,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) ret < 0 ? -1 : ref_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) } while (ret != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) swarn.path = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) swarn.dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) iterate_extent_inodes(fs_info, found_key.objectid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) extent_item_pos, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) scrub_print_warning_inode, &swarn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) static inline void scrub_get_recover(struct scrub_recover *recover)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) refcount_inc(&recover->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) struct scrub_recover *recover)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (refcount_dec_and_test(&recover->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) btrfs_put_bbio(recover->bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) kfree(recover);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * scrub_handle_errored_block gets called when either verification of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * pages failed or the bio failed to read, e.g. with EIO. In the latter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) * case, this function handles all pages in the bio, even though only one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) * may be bad.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * The goal of this function is to repair the errored block by using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) * contents of one of the mirrors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) struct scrub_ctx *sctx = sblock_to_check->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) struct btrfs_fs_info *fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) unsigned int failed_mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) unsigned int is_metadata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) unsigned int have_csum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) struct scrub_block *sblock_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) int mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) int page_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) int success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) bool full_stripe_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) unsigned int nofs_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) DEFAULT_RATELIMIT_BURST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) BUG_ON(sblock_to_check->page_count < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * if we find an error in a super block, we just report it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * They will get written with the next transaction commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * anyway
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) ++sctx->stat.super_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) logical = sblock_to_check->pagev[0]->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) is_metadata = !(sblock_to_check->pagev[0]->flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) BTRFS_EXTENT_FLAG_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) have_csum = sblock_to_check->pagev[0]->have_csum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) dev = sblock_to_check->pagev[0]->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * We must use GFP_NOFS because the scrub task might be waiting for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * worker task executing this function and in turn a transaction commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * might be waiting the scrub task to pause (which needs to wait for all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) * the worker tasks to complete before pausing).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) * We do allocations in the workers through insert_full_stripe_lock()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) * and scrub_add_page_to_wr_bio(), which happens down the call chain of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) * this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) nofs_flag = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * For RAID5/6, race can happen for a different device scrub thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) * For data corruption, Parity and Data threads will both try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) * to recovery the data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) * Race can lead to doubly added csum error, or even unrecoverable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) * error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) memalloc_nofs_restore(nofs_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) if (ret == -ENOMEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) sctx->stat.read_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) sctx->stat.uncorrectable_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) * read all mirrors one after the other. This includes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) * re-read the extent or metadata block that failed (that was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) * the cause that this fixup code is called) another time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) * page by page this time in order to know which pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) * caused I/O errors and which ones are good (for all mirrors).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) * It is the goal to handle the situation when more than one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) * mirror contains I/O errors, but the errors do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) * overlap, i.e. the data can be repaired by selecting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) * pages from those mirrors without I/O error on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) * would be that mirror #1 has an I/O error on the first page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) * the second page is good, and mirror #2 has an I/O error on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * the second page, but the first page is good.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) * Then the first page of the first mirror can be repaired by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) * taking the first page of the second mirror, and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) * second page of the second mirror can be repaired by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) * copying the contents of the 2nd page of the 1st mirror.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) * One more note: if the pages of one mirror contain I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) * errors, the checksum cannot be verified. In order to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * the best data for repairing, the first attempt is to find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) * a mirror without I/O errors and with a validated checksum.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * Only if this is not possible, the pages are picked from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * mirrors with I/O errors without considering the checksum.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * If the latter is the case, at the end, the checksum of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * repaired area is verified in order to correctly maintain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * the statistics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) sizeof(*sblocks_for_recheck), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) if (!sblocks_for_recheck) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) sctx->stat.read_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) sctx->stat.uncorrectable_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) /* setup the context, map the logical blocks and alloc the pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) sctx->stat.read_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) sctx->stat.uncorrectable_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) sblock_bad = sblocks_for_recheck + failed_mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) /* build and submit the bios for the failed mirror, check checksums */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) scrub_recheck_block(fs_info, sblock_bad, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) sblock_bad->no_io_error_seen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) * the error disappeared after reading page by page, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) * the area was part of a huge bio and other parts of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) * bio caused I/O errors, or the block layer merged several
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * read requests into one and the error is caused by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * different bio (usually one of the two latter cases is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * the cause)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) sctx->stat.unverified_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) sblock_to_check->data_corrected = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) if (sctx->is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) scrub_write_block_to_dev_replace(sblock_bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) if (!sblock_bad->no_io_error_seen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) sctx->stat.read_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) if (__ratelimit(&rs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) scrub_print_warning("i/o error", sblock_to_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) } else if (sblock_bad->checksum_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) sctx->stat.csum_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (__ratelimit(&rs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) scrub_print_warning("checksum error", sblock_to_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) btrfs_dev_stat_inc_and_print(dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) BTRFS_DEV_STAT_CORRUPTION_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) } else if (sblock_bad->header_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) sctx->stat.verify_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) if (__ratelimit(&rs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) scrub_print_warning("checksum/header error",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) sblock_to_check);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (sblock_bad->generation_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) btrfs_dev_stat_inc_and_print(dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) BTRFS_DEV_STAT_GENERATION_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) btrfs_dev_stat_inc_and_print(dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) BTRFS_DEV_STAT_CORRUPTION_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) if (sctx->readonly) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) ASSERT(!sctx->is_dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) * now build and submit the bios for the other mirrors, check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * checksums.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * First try to pick the mirror which is completely without I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * errors and also does not have a checksum error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * If one is found, and if a checksum is present, the full block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) * that is known to contain an error is rewritten. Afterwards
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) * the block is known to be corrected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * If a mirror is found which is completely correct, and no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * checksum is present, only those pages are rewritten that had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) * an I/O error in the block to be repaired, since it cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) * determined, which copy of the other pages is better (and it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * could happen otherwise that a correct page would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) * overwritten by a bad one).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) for (mirror_index = 0; ;mirror_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) struct scrub_block *sblock_other;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (mirror_index == failed_mirror_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /* raid56's mirror can be more than BTRFS_MAX_MIRRORS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) if (mirror_index >= BTRFS_MAX_MIRRORS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) if (!sblocks_for_recheck[mirror_index].page_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) sblock_other = sblocks_for_recheck + mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) struct scrub_recover *r = sblock_bad->pagev[0]->recover;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) int max_allowed = r->bbio->num_stripes -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) r->bbio->num_tgtdevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) if (mirror_index >= max_allowed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) if (!sblocks_for_recheck[1].page_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) ASSERT(failed_mirror_index == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) sblock_other = sblocks_for_recheck + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) /* build and submit the bios, check checksums */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) scrub_recheck_block(fs_info, sblock_other, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) if (!sblock_other->header_error &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) !sblock_other->checksum_error &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) sblock_other->no_io_error_seen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) if (sctx->is_dev_replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) scrub_write_block_to_dev_replace(sblock_other);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) goto corrected_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) ret = scrub_repair_block_from_good_copy(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) sblock_bad, sblock_other);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) goto corrected_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) goto did_not_correct_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) * In case of I/O errors in the area that is supposed to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) * repaired, continue by picking good copies of those pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * Select the good pages from mirrors to rewrite bad pages from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * the area to fix. Afterwards verify the checksum of the block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) * that is supposed to be repaired. This verification step is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * only done for the purpose of statistic counting and for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * final scrub report, whether errors remain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) * A perfect algorithm could make use of the checksum and try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) * all possible combinations of pages from the different mirrors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) * until the checksum verification succeeds. For example, when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * of mirror #2 is readable but the final checksum test fails,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * then the 2nd page of mirror #3 could be tried, whether now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * the final checksum succeeds. But this would be a rare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) * exception and is therefore not implemented. At least it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * avoided that the good copy is overwritten.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * A more useful improvement would be to pick the sectors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * without I/O error based on sector sizes (512 bytes on legacy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) * mirror could be repaired by taking 512 byte of a different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * mirror, even if other 512 byte sectors in the same PAGE_SIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * area are unreadable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) success = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) for (page_num = 0; page_num < sblock_bad->page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) page_num++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) struct scrub_page *page_bad = sblock_bad->pagev[page_num];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) struct scrub_block *sblock_other = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) /* skip no-io-error page in scrub */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (!page_bad->io_error && !sctx->is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) * In case of dev replace, if raid56 rebuild process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * didn't work out correct data, then copy the content
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * in sblock_bad to make sure target device is identical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * to source device, instead of writing garbage data in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * sblock_for_recheck array to target device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) sblock_other = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) } else if (page_bad->io_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) /* try to find no-io-error page in mirrors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) for (mirror_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) mirror_index < BTRFS_MAX_MIRRORS &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) sblocks_for_recheck[mirror_index].page_count > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) mirror_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) if (!sblocks_for_recheck[mirror_index].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) pagev[page_num]->io_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) sblock_other = sblocks_for_recheck +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) if (!sblock_other)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) success = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) if (sctx->is_dev_replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * did not find a mirror to fetch the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * from. scrub_write_page_to_dev_replace()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * handles this case (page->io_error), by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * filling the block with zeros before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * submitting the write request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) if (!sblock_other)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) sblock_other = sblock_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) if (scrub_write_page_to_dev_replace(sblock_other,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) page_num) != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) atomic64_inc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) &fs_info->dev_replace.num_write_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) success = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) } else if (sblock_other) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) ret = scrub_repair_page_from_good_copy(sblock_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) sblock_other,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) page_num, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) if (0 == ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) page_bad->io_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) success = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) if (success && !sctx->is_dev_replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) if (is_metadata || have_csum) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) * need to verify the checksum now that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) * sectors on disk are repaired (the write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) * request for data to be repaired is on its way).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) * Just be lazy and use scrub_recheck_block()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) * which re-reads the data before the checksum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) * is verified, but most likely the data comes out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) * of the page cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) scrub_recheck_block(fs_info, sblock_bad, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (!sblock_bad->header_error &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) !sblock_bad->checksum_error &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) sblock_bad->no_io_error_seen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) goto corrected_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) goto did_not_correct_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) corrected_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) sctx->stat.corrected_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) sblock_to_check->data_corrected = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) btrfs_err_rl_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) "fixed up error at logical %llu on dev %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) logical, rcu_str_deref(dev->name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) did_not_correct_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) sctx->stat.uncorrectable_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) btrfs_err_rl_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) "unable to fixup (regular) error at logical %llu on dev %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) logical, rcu_str_deref(dev->name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) if (sblocks_for_recheck) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) mirror_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) struct scrub_block *sblock = sblocks_for_recheck +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) struct scrub_recover *recover;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) int page_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) for (page_index = 0; page_index < sblock->page_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) page_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) sblock->pagev[page_index]->sblock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) recover = sblock->pagev[page_index]->recover;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) if (recover) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) scrub_put_recover(fs_info, recover);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) sblock->pagev[page_index]->recover =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) scrub_page_put(sblock->pagev[page_index]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) kfree(sblocks_for_recheck);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) memalloc_nofs_restore(nofs_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) return 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) return 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) return (int)bbio->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) u64 *raid_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) u64 mapped_length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) int nstripes, int mirror,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) int *stripe_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) u64 *stripe_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) /* RAID5/6 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) for (i = 0; i < nstripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) if (raid_map[i] == RAID6_Q_STRIPE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) raid_map[i] == RAID5_P_STRIPE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) if (logical >= raid_map[i] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) logical < raid_map[i] + mapped_length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) *stripe_index = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) *stripe_offset = logical - raid_map[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) /* The other RAID type */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) *stripe_index = mirror;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) *stripe_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) struct scrub_block *sblocks_for_recheck)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) struct scrub_ctx *sctx = original_sblock->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) u64 length = original_sblock->page_count * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) u64 logical = original_sblock->pagev[0]->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) u64 generation = original_sblock->pagev[0]->generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) u64 flags = original_sblock->pagev[0]->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) u64 have_csum = original_sblock->pagev[0]->have_csum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) struct scrub_recover *recover;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) struct btrfs_bio *bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) u64 sublen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) u64 mapped_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) u64 stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) int stripe_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) int page_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) int mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) int nmirrors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) * note: the two members refs and outstanding_pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) * are not used (and not set) in the blocks that are used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) * the recheck procedure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) while (length > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) sublen = min_t(u64, length, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) mapped_length = sublen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * with a length of PAGE_SIZE, each returned stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * represents one mirror
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) btrfs_bio_counter_inc_blocked(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) logical, &mapped_length, &bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) if (ret || !bbio || mapped_length < sublen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) if (!recover) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) refcount_set(&recover->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) recover->bbio = bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) recover->map_length = mapped_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) for (mirror_index = 0; mirror_index < nmirrors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) mirror_index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) struct scrub_block *sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) struct scrub_page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) sblock = sblocks_for_recheck + mirror_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) sblock->sctx = sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) page = kzalloc(sizeof(*page), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) leave_nomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) scrub_put_recover(fs_info, recover);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) scrub_page_get(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) sblock->pagev[page_index] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) page->sblock = sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) page->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) page->generation = generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) page->logical = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) page->have_csum = have_csum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (have_csum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) memcpy(page->csum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) original_sblock->pagev[0]->csum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) sctx->csum_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) scrub_stripe_index_and_offset(logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) bbio->map_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) bbio->raid_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) mapped_length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) bbio->num_stripes -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) bbio->num_tgtdevs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) mirror_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) &stripe_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) &stripe_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) page->physical = bbio->stripes[stripe_index].physical +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) page->dev = bbio->stripes[stripe_index].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) BUG_ON(page_index >= original_sblock->page_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) page->physical_for_dev_replace =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) original_sblock->pagev[page_index]->
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) physical_for_dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) /* for missing devices, dev->bdev is NULL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) page->mirror_num = mirror_index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) sblock->page_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) page->page = alloc_page(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) if (!page->page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) goto leave_nomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) scrub_get_recover(recover);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) page->recover = recover;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) scrub_put_recover(fs_info, recover);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) length -= sublen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) logical += sublen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) page_index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) static void scrub_bio_wait_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) complete(bio->bi_private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) struct scrub_page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) DECLARE_COMPLETION_ONSTACK(done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) int mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) bio->bi_iter.bi_sector = page->logical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) bio->bi_private = &done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) bio->bi_end_io = scrub_bio_wait_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) mirror_num = page->sblock->pagev[0]->mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) page->recover->map_length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) mirror_num, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) wait_for_completion_io(&done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) return blk_status_to_errno(bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) struct scrub_page *first_page = sblock->pagev[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) int page_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) /* All pages in sblock belong to the same stripe on the same device. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) ASSERT(first_page->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) if (!first_page->dev->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) bio_set_dev(bio, first_page->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) for (page_num = 0; page_num < sblock->page_count; page_num++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) struct scrub_page *page = sblock->pagev[page_num];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) WARN_ON(!page->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) bio_add_page(bio, page->page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) scrub_recheck_block_checksum(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) for (page_num = 0; page_num < sblock->page_count; page_num++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) sblock->pagev[page_num]->io_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) sblock->no_io_error_seen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * this function will check the on disk data for checksum errors, header
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) * errors and read I/O errors. If any I/O errors happen, the exact pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) * which are errored are marked as being bad. The goal is to enable scrub
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) * to take those pages that are not errored from all the mirrors so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) * the pages that are errored in the just handled mirror can be repaired.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) struct scrub_block *sblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) int retry_failed_mirror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) int page_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) sblock->no_io_error_seen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) /* short cut for raid56 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) return scrub_recheck_block_on_raid56(fs_info, sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) for (page_num = 0; page_num < sblock->page_count; page_num++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) struct scrub_page *page = sblock->pagev[page_num];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) if (page->dev->bdev == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) page->io_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) sblock->no_io_error_seen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) WARN_ON(!page->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) bio = btrfs_io_bio_alloc(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) bio_set_dev(bio, page->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) bio_add_page(bio, page->page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) bio->bi_iter.bi_sector = page->physical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) bio->bi_opf = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (btrfsic_submit_bio_wait(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) page->io_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) sblock->no_io_error_seen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) if (sblock->no_io_error_seen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) scrub_recheck_block_checksum(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) static inline int scrub_check_fsid(u8 fsid[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) struct scrub_page *spage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) return !ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) static void scrub_recheck_block_checksum(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) sblock->header_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) sblock->checksum_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) sblock->generation_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) scrub_checksum_data(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) scrub_checksum_tree_block(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) struct scrub_block *sblock_good)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) int page_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) int ret_sub;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) sblock_good,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) page_num, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) if (ret_sub)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) ret = ret_sub;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) struct scrub_block *sblock_good,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) int page_num, int force_write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) struct scrub_page *page_bad = sblock_bad->pagev[page_num];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) struct scrub_page *page_good = sblock_good->pagev[page_num];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) BUG_ON(page_bad->page == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) BUG_ON(page_good->page == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) if (force_write || sblock_bad->header_error ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) sblock_bad->checksum_error || page_bad->io_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (!page_bad->dev->bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) btrfs_warn_rl(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) bio = btrfs_io_bio_alloc(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) bio_set_dev(bio, page_bad->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) bio->bi_iter.bi_sector = page_bad->physical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) bio->bi_opf = REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) if (PAGE_SIZE != ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) if (btrfsic_submit_bio_wait(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) btrfs_dev_stat_inc_and_print(page_bad->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) BTRFS_DEV_STAT_WRITE_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) atomic64_inc(&fs_info->dev_replace.num_write_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) int page_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) * This block is used for the check of the parity on the source device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) * so the data needn't be written into the destination device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) if (sblock->sparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) for (page_num = 0; page_num < sblock->page_count; page_num++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) ret = scrub_write_page_to_dev_replace(sblock, page_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) atomic64_inc(&fs_info->dev_replace.num_write_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) int page_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) struct scrub_page *spage = sblock->pagev[page_num];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) BUG_ON(spage->page == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) if (spage->io_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) clear_page(page_address(spage->page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) return scrub_add_page_to_wr_bio(sblock->sctx, spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) struct scrub_page *spage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) struct scrub_bio *sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) mutex_lock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) if (!sctx->wr_curr_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) if (!sctx->wr_curr_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) sctx->wr_curr_bio->sctx = sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) sctx->wr_curr_bio->page_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) sbio = sctx->wr_curr_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) if (sbio->page_count == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) sbio->physical = spage->physical_for_dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) sbio->logical = spage->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) sbio->dev = sctx->wr_tgtdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) bio = sbio->bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) if (!bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) sbio->bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) bio->bi_private = sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) bio->bi_end_io = scrub_wr_bio_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) bio_set_dev(bio, sbio->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) bio->bi_iter.bi_sector = sbio->physical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) bio->bi_opf = REQ_OP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) sbio->status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) spage->physical_for_dev_replace ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) sbio->logical + sbio->page_count * PAGE_SIZE !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) spage->logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) if (ret != PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) if (sbio->page_count < 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) bio_put(sbio->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) sbio->bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) sbio->pagev[sbio->page_count] = spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) scrub_page_get(spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) sbio->page_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) if (sbio->page_count == sctx->pages_per_wr_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) static void scrub_wr_submit(struct scrub_ctx *sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) struct scrub_bio *sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) if (!sctx->wr_curr_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) sbio = sctx->wr_curr_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) sctx->wr_curr_bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) WARN_ON(!sbio->bio->bi_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) scrub_pending_bio_inc(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) /* process all writes in a single worker thread. Then the block layer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) * orders the requests before sending them to the driver which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) * doubled the write performance on spinning disks when measured
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) * with Linux 3.5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) btrfsic_submit_bio(sbio->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) static void scrub_wr_bio_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) struct scrub_bio *sbio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) sbio->status = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) sbio->bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) struct scrub_ctx *sctx = sbio->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) if (sbio->status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) struct btrfs_dev_replace *dev_replace =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) &sbio->sctx->fs_info->dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) for (i = 0; i < sbio->page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) struct scrub_page *spage = sbio->pagev[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) spage->io_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) atomic64_inc(&dev_replace->num_write_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) for (i = 0; i < sbio->page_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) scrub_page_put(sbio->pagev[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) bio_put(sbio->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) kfree(sbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) scrub_pending_bio_dec(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) static int scrub_checksum(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) u64 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) * No need to initialize these stats currently,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) * because this function only use return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) * instead of these stats value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) * Todo:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) * always use stats
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) sblock->header_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) sblock->generation_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) sblock->checksum_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) WARN_ON(sblock->page_count < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) flags = sblock->pagev[0]->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) if (flags & BTRFS_EXTENT_FLAG_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) ret = scrub_checksum_data(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) ret = scrub_checksum_tree_block(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) else if (flags & BTRFS_EXTENT_FLAG_SUPER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) (void)scrub_checksum_super(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) scrub_handle_errored_block(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) static int scrub_checksum_data(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) struct scrub_ctx *sctx = sblock->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) u8 csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) struct scrub_page *spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) char *kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) BUG_ON(sblock->page_count < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) spage = sblock->pagev[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) if (!spage->have_csum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) kaddr = page_address(spage->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) shash->tfm = fs_info->csum_shash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) crypto_shash_init(shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) if (memcmp(csum, spage->csum, sctx->csum_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) sblock->checksum_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) return sblock->checksum_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) static int scrub_checksum_tree_block(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) struct scrub_ctx *sctx = sblock->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) struct btrfs_header *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) u8 calculated_csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) u8 on_disk_csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) const int num_pages = sctx->fs_info->nodesize >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) struct scrub_page *spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) char *kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) BUG_ON(sblock->page_count < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) spage = sblock->pagev[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) kaddr = page_address(spage->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) h = (struct btrfs_header *)kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) memcpy(on_disk_csum, h->csum, sctx->csum_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) * we don't use the getter functions here, as we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) * a) don't have an extent buffer and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) * b) the page is already kmapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) if (spage->logical != btrfs_stack_header_bytenr(h))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) sblock->header_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) if (spage->generation != btrfs_stack_header_generation(h)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) sblock->header_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) sblock->generation_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) if (!scrub_check_fsid(h->fsid, spage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) sblock->header_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) BTRFS_UUID_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) sblock->header_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) shash->tfm = fs_info->csum_shash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) crypto_shash_init(shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) PAGE_SIZE - BTRFS_CSUM_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) for (i = 1; i < num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) kaddr = page_address(sblock->pagev[i]->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) crypto_shash_update(shash, kaddr, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) crypto_shash_final(shash, calculated_csum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) sblock->checksum_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) return sblock->header_error || sblock->checksum_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) static int scrub_checksum_super(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) struct btrfs_super_block *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) struct scrub_ctx *sctx = sblock->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) u8 calculated_csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) struct scrub_page *spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) char *kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) int fail_gen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) int fail_cor = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) BUG_ON(sblock->page_count < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) spage = sblock->pagev[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) kaddr = page_address(spage->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) s = (struct btrfs_super_block *)kaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) if (spage->logical != btrfs_super_bytenr(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) ++fail_cor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) if (spage->generation != btrfs_super_generation(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) ++fail_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) if (!scrub_check_fsid(s->fsid, spage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) ++fail_cor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) shash->tfm = fs_info->csum_shash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) crypto_shash_init(shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) if (memcmp(calculated_csum, s->csum, sctx->csum_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) ++fail_cor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) if (fail_cor + fail_gen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) * if we find an error in a super block, we just report it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) * They will get written with the next transaction commit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) * anyway
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) ++sctx->stat.super_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) if (fail_cor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) btrfs_dev_stat_inc_and_print(spage->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) BTRFS_DEV_STAT_CORRUPTION_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) btrfs_dev_stat_inc_and_print(spage->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) BTRFS_DEV_STAT_GENERATION_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) return fail_cor + fail_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) static void scrub_block_get(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) refcount_inc(&sblock->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) static void scrub_block_put(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) if (refcount_dec_and_test(&sblock->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) if (sblock->sparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) scrub_parity_put(sblock->sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) for (i = 0; i < sblock->page_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) scrub_page_put(sblock->pagev[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) kfree(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) static void scrub_page_get(struct scrub_page *spage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) atomic_inc(&spage->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) static void scrub_page_put(struct scrub_page *spage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) if (atomic_dec_and_test(&spage->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) if (spage->page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) __free_page(spage->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) kfree(spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) static void scrub_submit(struct scrub_ctx *sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) struct scrub_bio *sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) if (sctx->curr == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) sbio = sctx->bios[sctx->curr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) sctx->curr = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) scrub_pending_bio_inc(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) btrfsic_submit_bio(sbio->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) struct scrub_page *spage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) struct scrub_block *sblock = spage->sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) struct scrub_bio *sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) * grab a fresh bio or wait for one to become available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) while (sctx->curr == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) spin_lock(&sctx->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) sctx->curr = sctx->first_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) if (sctx->curr != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) sctx->first_free = sctx->bios[sctx->curr]->next_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) sctx->bios[sctx->curr]->next_free = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) sctx->bios[sctx->curr]->page_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) spin_unlock(&sctx->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) spin_unlock(&sctx->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) wait_event(sctx->list_wait, sctx->first_free != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) sbio = sctx->bios[sctx->curr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) if (sbio->page_count == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) sbio->physical = spage->physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) sbio->logical = spage->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) sbio->dev = spage->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) bio = sbio->bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) if (!bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) sbio->bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) bio->bi_private = sbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) bio->bi_end_io = scrub_bio_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) bio_set_dev(bio, sbio->dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) bio->bi_iter.bi_sector = sbio->physical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) bio->bi_opf = REQ_OP_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) sbio->status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) spage->physical ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) sbio->logical + sbio->page_count * PAGE_SIZE !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) spage->logical ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) sbio->dev != spage->dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) sbio->pagev[sbio->page_count] = spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) if (ret != PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) if (sbio->page_count < 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) bio_put(sbio->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) sbio->bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) scrub_block_get(sblock); /* one for the page added to the bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) atomic_inc(&sblock->outstanding_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) sbio->page_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) if (sbio->page_count == sctx->pages_per_rd_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) static void scrub_missing_raid56_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) struct scrub_block *sblock = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) sblock->no_io_error_seen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) static void scrub_missing_raid56_worker(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) struct scrub_block *sblock = container_of(work, struct scrub_block, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) struct scrub_ctx *sctx = sblock->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) logical = sblock->pagev[0]->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) dev = sblock->pagev[0]->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) if (sblock->no_io_error_seen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) scrub_recheck_block_checksum(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) if (!sblock->no_io_error_seen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) sctx->stat.read_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) btrfs_err_rl_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) "IO error rebuilding logical %llu for dev %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) logical, rcu_str_deref(dev->name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) } else if (sblock->header_error || sblock->checksum_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) sctx->stat.uncorrectable_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) btrfs_err_rl_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) "failed to rebuild valid logical %llu for dev %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) logical, rcu_str_deref(dev->name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) scrub_write_block_to_dev_replace(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) if (sctx->is_dev_replace && sctx->flush_all_writes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) mutex_lock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) scrub_pending_bio_dec(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) static void scrub_missing_raid56_pages(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) struct scrub_ctx *sctx = sblock->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) u64 length = sblock->page_count * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) u64 logical = sblock->pagev[0]->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) btrfs_bio_counter_inc_blocked(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) &length, &bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) if (ret || !bbio || !bbio->raid_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) goto bbio_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) if (WARN_ON(!sctx->is_dev_replace ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) * We shouldn't be scrubbing a missing device. Even for dev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) * replace, we should only get here for RAID 5/6. We either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) * managed to mount something with no mirrors remaining or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) * there's a bug in scrub_remap_extent()/btrfs_map_block().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) goto bbio_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) bio = btrfs_io_bio_alloc(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) bio->bi_iter.bi_sector = logical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) bio->bi_private = sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) bio->bi_end_io = scrub_missing_raid56_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) if (!rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) goto rbio_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) for (i = 0; i < sblock->page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) struct scrub_page *spage = sblock->pagev[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) raid56_add_scrub_pages(rbio, spage->page, spage->logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) scrub_block_get(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) scrub_pending_bio_inc(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) raid56_submit_missing_rbio(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) rbio_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) bbio_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) u64 physical, struct btrfs_device *dev, u64 flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) u64 gen, int mirror_num, u8 *csum, int force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) u64 physical_for_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) struct scrub_block *sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) if (!sblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) /* one ref inside this function, plus one for each page added to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) * a bio later on */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) refcount_set(&sblock->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) sblock->sctx = sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) sblock->no_io_error_seen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) for (index = 0; len > 0; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) struct scrub_page *spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) u64 l = min_t(u64, len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) spage = kzalloc(sizeof(*spage), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) if (!spage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) leave_nomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) scrub_page_get(spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) sblock->pagev[index] = spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) spage->sblock = sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) spage->dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) spage->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) spage->generation = gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) spage->logical = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) spage->physical = physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) spage->physical_for_dev_replace = physical_for_dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) spage->mirror_num = mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) if (csum) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) spage->have_csum = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) memcpy(spage->csum, csum, sctx->csum_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) spage->have_csum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) sblock->page_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) spage->page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) if (!spage->page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) goto leave_nomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) len -= l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) logical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) physical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) physical_for_dev_replace += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) WARN_ON(sblock->page_count == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) * This case should only be hit for RAID 5/6 device replace. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) * the comment in scrub_missing_raid56_pages() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) scrub_missing_raid56_pages(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) for (index = 0; index < sblock->page_count; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) struct scrub_page *spage = sblock->pagev[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) ret = scrub_add_page_to_rd_bio(sctx, spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) if (force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) /* last one frees, either here or in bio completion for last page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) static void scrub_bio_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) struct scrub_bio *sbio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) sbio->status = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) sbio->bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) static void scrub_bio_end_io_worker(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) struct scrub_ctx *sctx = sbio->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) if (sbio->status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) for (i = 0; i < sbio->page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) struct scrub_page *spage = sbio->pagev[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) spage->io_error = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) spage->sblock->no_io_error_seen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) /* now complete the scrub_block items that have all pages completed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) for (i = 0; i < sbio->page_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) struct scrub_page *spage = sbio->pagev[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) struct scrub_block *sblock = spage->sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) if (atomic_dec_and_test(&sblock->outstanding_pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) scrub_block_complete(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) bio_put(sbio->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) sbio->bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) spin_lock(&sctx->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) sbio->next_free = sctx->first_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) sctx->first_free = sbio->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) spin_unlock(&sctx->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) if (sctx->is_dev_replace && sctx->flush_all_writes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) mutex_lock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) scrub_pending_bio_dec(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) unsigned long *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) u64 start, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) u64 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) u64 nsectors64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) u32 nsectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) int sectorsize = sparity->sctx->fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) if (len >= sparity->stripe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) bitmap_set(bitmap, 0, sparity->nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) start -= sparity->logic_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) start = div64_u64_rem(start, sparity->stripe_len, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) offset = div_u64(offset, sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) nsectors64 = div_u64(len, sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) ASSERT(nsectors64 < UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) nsectors = (u32)nsectors64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) if (offset + nsectors <= sparity->nsectors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) bitmap_set(bitmap, offset, nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) bitmap_set(bitmap, offset, sparity->nsectors - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) u64 start, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) u64 start, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) static void scrub_block_complete(struct scrub_block *sblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) int corrupted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) if (!sblock->no_io_error_seen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) corrupted = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) scrub_handle_errored_block(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) * if has checksum error, write via repair mechanism in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) * dev replace case, otherwise write here in dev replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) * case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) corrupted = scrub_checksum(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) if (!corrupted && sblock->sctx->is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) scrub_write_block_to_dev_replace(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) if (sblock->sparity && corrupted && !sblock->data_corrected) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) u64 start = sblock->pagev[0]->logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) u64 end = sblock->pagev[sblock->page_count - 1]->logical +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) scrub_parity_mark_sectors_error(sblock->sparity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) start, end - start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) struct btrfs_ordered_sum *sum = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) unsigned long num_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) while (!list_empty(&sctx->csum_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) sum = list_first_entry(&sctx->csum_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) struct btrfs_ordered_sum, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) if (sum->bytenr > logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) if (sum->bytenr + sum->len > logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) ++sctx->stat.csum_discards;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) list_del(&sum->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) kfree(sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) sum = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) if (!sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) ASSERT(index < UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) num_sectors = sum->len / sctx->fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) if (index == num_sectors - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) list_del(&sum->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) kfree(sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) /* scrub extent tries to collect up to 64 kB for each bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) u64 logical, u64 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) u64 physical, struct btrfs_device *dev, u64 flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) u64 gen, int mirror_num, u64 physical_for_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) u8 csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) u32 blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) if (flags & BTRFS_EXTENT_FLAG_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) blocksize = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) blocksize = sctx->fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) sctx->stat.data_extents_scrubbed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) sctx->stat.data_bytes_scrubbed += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) blocksize = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) blocksize = sctx->fs_info->nodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) sctx->stat.tree_extents_scrubbed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) sctx->stat.tree_bytes_scrubbed += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) blocksize = sctx->fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) u64 l = min_t(u64, len, blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) int have_csum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) if (flags & BTRFS_EXTENT_FLAG_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) /* push csums to sbio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) have_csum = scrub_find_csum(sctx, logical, csum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) if (have_csum == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) ++sctx->stat.no_csum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) mirror_num, have_csum ? csum : NULL, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) physical_for_dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) len -= l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) logical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) physical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) physical_for_dev_replace += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) static int scrub_pages_for_parity(struct scrub_parity *sparity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) u64 logical, u64 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) u64 physical, struct btrfs_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) u64 flags, u64 gen, int mirror_num, u8 *csum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) struct scrub_ctx *sctx = sparity->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) struct scrub_block *sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) if (!sblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) /* one ref inside this function, plus one for each page added to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) * a bio later on */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) refcount_set(&sblock->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) sblock->sctx = sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) sblock->no_io_error_seen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) sblock->sparity = sparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) scrub_parity_get(sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) for (index = 0; len > 0; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) struct scrub_page *spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) u64 l = min_t(u64, len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) spage = kzalloc(sizeof(*spage), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) if (!spage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) leave_nomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) /* For scrub block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) scrub_page_get(spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) sblock->pagev[index] = spage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) /* For scrub parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) scrub_page_get(spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) list_add_tail(&spage->list, &sparity->spages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) spage->sblock = sblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) spage->dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) spage->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) spage->generation = gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) spage->logical = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) spage->physical = physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) spage->mirror_num = mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) if (csum) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) spage->have_csum = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) memcpy(spage->csum, csum, sctx->csum_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) spage->have_csum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) sblock->page_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) spage->page = alloc_page(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) if (!spage->page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) goto leave_nomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) len -= l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) logical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) physical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) WARN_ON(sblock->page_count == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) for (index = 0; index < sblock->page_count; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) struct scrub_page *spage = sblock->pagev[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) ret = scrub_add_page_to_rd_bio(sctx, spage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) /* last one frees, either here or in bio completion for last page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) scrub_block_put(sblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) static int scrub_extent_for_parity(struct scrub_parity *sparity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) u64 logical, u64 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) u64 physical, struct btrfs_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) u64 flags, u64 gen, int mirror_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) struct scrub_ctx *sctx = sparity->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) u8 csum[BTRFS_CSUM_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) u32 blocksize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) scrub_parity_mark_sectors_error(sparity, logical, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) if (flags & BTRFS_EXTENT_FLAG_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) blocksize = sparity->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) blocksize = sparity->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) blocksize = sctx->fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) u64 l = min_t(u64, len, blocksize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) int have_csum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) if (flags & BTRFS_EXTENT_FLAG_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) /* push csums to sbio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) have_csum = scrub_find_csum(sctx, logical, csum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) if (have_csum == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) flags, gen, mirror_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) have_csum ? csum : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) len -= l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) logical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) physical += l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) * Given a physical address, this will calculate it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) * logical offset. if this is a parity stripe, it will return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) * the most left data stripe's logical offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) * return 0 if it is a data stripe, 1 means parity stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) static int get_raid56_logic_offset(u64 physical, int num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) struct map_lookup *map, u64 *offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) u64 *stripe_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) int j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) u64 stripe_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) u64 last_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) u32 stripe_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) u32 rot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) const int data_stripes = nr_data_stripes(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) last_offset = (physical - map->stripes[num].physical) * data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) if (stripe_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) *stripe_start = last_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) *offset = last_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) for (i = 0; i < data_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) *offset = last_offset + i * map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) stripe_nr = div64_u64(*offset, map->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) stripe_nr = div_u64(stripe_nr, data_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) /* Work out the disk rotation on this stripe-set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) /* calculate which stripe this data locates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) rot += i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) stripe_index = rot % map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) if (stripe_index == num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) if (stripe_index < num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) *offset = last_offset + j * map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) static void scrub_free_parity(struct scrub_parity *sparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) struct scrub_ctx *sctx = sparity->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) struct scrub_page *curr, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) int nbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) if (nbits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) sctx->stat.read_errors += nbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) sctx->stat.uncorrectable_errors += nbits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) list_for_each_entry_safe(curr, next, &sparity->spages, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) list_del_init(&curr->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) scrub_page_put(curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) kfree(sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) struct scrub_parity *sparity = container_of(work, struct scrub_parity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) struct scrub_ctx *sctx = sparity->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) scrub_free_parity(sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) scrub_pending_bio_dec(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) static void scrub_parity_bio_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) sparity->nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) struct scrub_ctx *sctx = sparity->sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) struct btrfs_raid_bio *rbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) u64 length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) sparity->nsectors))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) length = sparity->logic_end - sparity->logic_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) btrfs_bio_counter_inc_blocked(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) &length, &bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) if (ret || !bbio || !bbio->raid_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) goto bbio_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) bio = btrfs_io_bio_alloc(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) bio->bi_iter.bi_sector = sparity->logic_start >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) bio->bi_private = sparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) bio->bi_end_io = scrub_parity_bio_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) length, sparity->scrub_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) sparity->dbitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) sparity->nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) if (!rbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) goto rbio_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) scrub_pending_bio_inc(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) raid56_parity_submit_scrub_rbio(rbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) rbio_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) bbio_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) sparity->nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) scrub_free_parity(sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) static inline int scrub_calc_parity_bitmap_len(int nsectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) static void scrub_parity_get(struct scrub_parity *sparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) refcount_inc(&sparity->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) static void scrub_parity_put(struct scrub_parity *sparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) if (!refcount_dec_and_test(&sparity->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) scrub_parity_check_and_repair(sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) struct map_lookup *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) struct btrfs_device *sdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) struct btrfs_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) u64 logic_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) u64 logic_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) struct btrfs_root *root = fs_info->extent_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) struct btrfs_root *csum_root = fs_info->csum_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) struct btrfs_extent_item *extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) u64 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) struct extent_buffer *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) u64 generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) u64 extent_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) u64 extent_physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) u64 extent_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) u64 mapped_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) struct btrfs_device *extent_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) struct scrub_parity *sparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) int nsectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) int bitmap_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) int extent_mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) int stop_loop = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) if (!sparity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) sctx->stat.malloc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) sparity->stripe_len = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) sparity->nsectors = nsectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) sparity->sctx = sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) sparity->scrub_dev = sdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) sparity->logic_start = logic_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) sparity->logic_end = logic_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) refcount_set(&sparity->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) INIT_LIST_HEAD(&sparity->spages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) sparity->dbitmap = sparity->bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) while (logic_start < logic_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) key.type = BTRFS_METADATA_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) key.type = BTRFS_EXTENT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) key.objectid = logic_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) key.offset = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) ret = btrfs_previous_extent_item(root, path, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) ret = btrfs_search_slot(NULL, root, &key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) stop_loop = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) u64 bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) l = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) if (slot >= btrfs_header_nritems(l)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) ret = btrfs_next_leaf(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) stop_loop = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) btrfs_item_key_to_cpu(l, &key, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) if (key.type != BTRFS_EXTENT_ITEM_KEY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) key.type != BTRFS_METADATA_ITEM_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) if (key.type == BTRFS_METADATA_ITEM_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) bytes = fs_info->nodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) bytes = key.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) if (key.objectid + bytes <= logic_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) if (key.objectid >= logic_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) stop_loop = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) while (key.objectid >= logic_start + map->stripe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) logic_start += map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) extent = btrfs_item_ptr(l, slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) struct btrfs_extent_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) flags = btrfs_extent_flags(l, extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) generation = btrfs_extent_generation(l, extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) (key.objectid < logic_start ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) key.objectid + bytes >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) logic_start + map->stripe_len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) key.objectid, logic_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) sctx->stat.uncorrectable_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) extent_logical = key.objectid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) extent_len = bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) if (extent_logical < logic_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) extent_len -= logic_start - extent_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) extent_logical = logic_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) if (extent_logical + extent_len >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) logic_start + map->stripe_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) extent_len = logic_start + map->stripe_len -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) extent_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) scrub_parity_mark_sectors_data(sparity, extent_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) extent_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) mapped_length = extent_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) extent_logical, &mapped_length, &bbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) if (!bbio || mapped_length < extent_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) extent_physical = bbio->stripes[0].physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) extent_mirror_num = bbio->mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) extent_dev = bbio->stripes[0].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) ret = btrfs_lookup_csums_range(csum_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) extent_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) extent_logical + extent_len - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) &sctx->csum_list, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) ret = scrub_extent_for_parity(sparity, extent_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) extent_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) extent_physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) extent_dev, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) generation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) extent_mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) scrub_free_csums(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) if (extent_logical + extent_len <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) key.objectid + bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) logic_start += map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) if (logic_start >= logic_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) stop_loop = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) if (logic_start < key.objectid + bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) path->slots[0]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) if (stop_loop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) logic_start += map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) scrub_parity_mark_sectors_error(sparity, logic_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) logic_end - logic_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) scrub_parity_put(sparity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) mutex_lock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) return ret < 0 ? ret : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) struct map_lookup *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) struct btrfs_device *scrub_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) int num, u64 base, u64 length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) struct btrfs_block_group *cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) struct btrfs_path *path, *ppath;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) struct btrfs_root *root = fs_info->extent_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) struct btrfs_root *csum_root = fs_info->csum_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) struct btrfs_extent_item *extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) u64 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) u64 nstripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) struct extent_buffer *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) u64 physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) u64 logic_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) u64 physical_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) u64 generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) int mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) struct reada_control *reada1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) struct reada_control *reada2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) struct btrfs_key key_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) u64 increment = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) u64 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) u64 extent_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) u64 extent_physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) u64 extent_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) u64 stripe_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) u64 stripe_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) struct btrfs_device *extent_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) int extent_mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) int stop_loop = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) physical = map->stripes[num].physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) nstripes = div64_u64(length, map->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) offset = map->stripe_len * num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) increment = map->stripe_len * map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) mirror_num = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) int factor = map->num_stripes / map->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) offset = map->stripe_len * (num / map->sub_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) increment = map->stripe_len * factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) mirror_num = num % map->sub_stripes + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) increment = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) mirror_num = num % map->num_stripes + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) increment = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) mirror_num = num % map->num_stripes + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) get_raid56_logic_offset(physical, num, map, &offset, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) increment = map->stripe_len * nr_data_stripes(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) mirror_num = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) increment = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) mirror_num = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) ppath = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) if (!ppath) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) * work on commit root. The related disk blocks are static as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) * long as COW is applied. This means, it is save to rewrite
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) * them to repair disk errors without any race conditions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) path->search_commit_root = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) path->skip_locking = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) ppath->search_commit_root = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) ppath->skip_locking = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) * trigger the readahead for extent tree csum tree and wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) * completion. During readahead, the scrub is officially paused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) * to not hold off transaction commits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) logical = base + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) physical_end = physical + nstripes * map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) get_raid56_logic_offset(physical_end, num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) map, &logic_end, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) logic_end += base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) logic_end = logical + increment * nstripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) wait_event(sctx->list_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) atomic_read(&sctx->bios_in_flight) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) scrub_blocked_if_needed(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) /* FIXME it might be better to start readahead at commit root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) key.objectid = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) key.type = BTRFS_EXTENT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) key.offset = (u64)0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) key_end.objectid = logic_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) key_end.type = BTRFS_METADATA_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) key_end.offset = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) reada1 = btrfs_reada_add(root, &key, &key_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) key.type = BTRFS_EXTENT_CSUM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) key.offset = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) key_end.type = BTRFS_EXTENT_CSUM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) key_end.offset = logic_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) reada2 = btrfs_reada_add(csum_root, &key, &key_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) if (!IS_ERR(reada1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) btrfs_reada_wait(reada1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) if (!IS_ERR(reada2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) btrfs_reada_wait(reada2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) * collect all data csums for the stripe to avoid seeking during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) * the scrub. This might currently (crc32) end up to be about 1MB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) * now find all extents for each stripe and scrub them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) while (physical < physical_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) * canceled?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) if (atomic_read(&fs_info->scrub_cancel_req) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) atomic_read(&sctx->cancel_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) ret = -ECANCELED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) * check to see if we have to pause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) if (atomic_read(&fs_info->scrub_pause_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) /* push queued extents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) sctx->flush_all_writes = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) mutex_lock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) wait_event(sctx->list_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) atomic_read(&sctx->bios_in_flight) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) sctx->flush_all_writes = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) scrub_blocked_if_needed(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) ret = get_raid56_logic_offset(physical, num, map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) &logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) &stripe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) logical += base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) /* it is parity strip */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) stripe_logical += base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) stripe_end = stripe_logical + increment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) ret = scrub_raid56_parity(sctx, map, scrub_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) ppath, stripe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) stripe_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) key.type = BTRFS_METADATA_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) key.type = BTRFS_EXTENT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) key.objectid = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) key.offset = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) ret = btrfs_previous_extent_item(root, path, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) /* there's no smaller item, so stick with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) * larger one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) ret = btrfs_search_slot(NULL, root, &key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) stop_loop = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) u64 bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) l = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) if (slot >= btrfs_header_nritems(l)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) ret = btrfs_next_leaf(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) stop_loop = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) btrfs_item_key_to_cpu(l, &key, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) if (key.type != BTRFS_EXTENT_ITEM_KEY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) key.type != BTRFS_METADATA_ITEM_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) if (key.type == BTRFS_METADATA_ITEM_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) bytes = fs_info->nodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) bytes = key.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) if (key.objectid + bytes <= logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) if (key.objectid >= logical + map->stripe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) /* out of this device extent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) if (key.objectid >= logic_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) stop_loop = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) * If our block group was removed in the meanwhile, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) * stop scrubbing since there is no point in continuing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) * Continuing would prevent reusing its device extents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) * for new block groups for a long time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) spin_lock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) if (cache->removed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) extent = btrfs_item_ptr(l, slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) struct btrfs_extent_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) flags = btrfs_extent_flags(l, extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) generation = btrfs_extent_generation(l, extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) (key.objectid < logical ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) key.objectid + bytes >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) logical + map->stripe_len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) key.objectid, logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) sctx->stat.uncorrectable_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) extent_logical = key.objectid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) extent_len = bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) * trim extent to this stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) if (extent_logical < logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) extent_len -= logical - extent_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) extent_logical = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) if (extent_logical + extent_len >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) logical + map->stripe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) extent_len = logical + map->stripe_len -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) extent_logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) extent_physical = extent_logical - logical + physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) extent_dev = scrub_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) extent_mirror_num = mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) if (sctx->is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) scrub_remap_extent(fs_info, extent_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) extent_len, &extent_physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) &extent_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) &extent_mirror_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) if (flags & BTRFS_EXTENT_FLAG_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) ret = btrfs_lookup_csums_range(csum_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) extent_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) extent_logical + extent_len - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) &sctx->csum_list, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) ret = scrub_extent(sctx, map, extent_logical, extent_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) extent_physical, extent_dev, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) generation, extent_mirror_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) extent_logical - logical + physical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) scrub_free_csums(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) if (extent_logical + extent_len <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) key.objectid + bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) * loop until we find next data stripe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) * or we have finished all stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) physical += map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) ret = get_raid56_logic_offset(physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) num, map, &logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) &stripe_logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) logical += base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) if (ret && physical < physical_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) stripe_logical += base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) stripe_end = stripe_logical +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) increment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) ret = scrub_raid56_parity(sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) map, scrub_dev, ppath,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) stripe_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) stripe_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) physical += map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) logical += increment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) if (logical < key.objectid + bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) if (physical >= physical_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) stop_loop = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) path->slots[0]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) logical += increment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) physical += map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) spin_lock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) if (stop_loop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) sctx->stat.last_physical = map->stripes[num].physical +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) sctx->stat.last_physical = physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) spin_unlock(&sctx->stat_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) if (stop_loop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) /* push queued extents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) mutex_lock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) btrfs_free_path(ppath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) return ret < 0 ? ret : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) struct btrfs_device *scrub_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) u64 chunk_offset, u64 length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) u64 dev_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) struct btrfs_block_group *cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) struct extent_map_tree *map_tree = &fs_info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) read_lock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) em = lookup_extent_mapping(map_tree, chunk_offset, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) read_unlock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) if (!em) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) * Might have been an unused block group deleted by the cleaner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) * kthread or relocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) spin_lock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) if (!cache->removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) if (em->start != chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) if (em->len < length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) for (i = 0; i < map->num_stripes; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) map->stripes[i].physical == dev_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) ret = scrub_stripe(sctx, map, scrub_dev, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) chunk_offset, length, cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) static noinline_for_stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) int scrub_enumerate_chunks(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) struct btrfs_device *scrub_dev, u64 start, u64 end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) struct btrfs_dev_extent *dev_extent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) struct btrfs_root *root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) u64 length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) u64 chunk_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) int ro_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) struct extent_buffer *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) struct btrfs_key found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) struct btrfs_block_group *cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) path->reada = READA_FORWARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) path->search_commit_root = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) path->skip_locking = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) key.objectid = scrub_dev->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) key.offset = 0ull;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) key.type = BTRFS_DEV_EXTENT_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) if (path->slots[0] >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) btrfs_header_nritems(path->nodes[0])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) ret = btrfs_next_leaf(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) l = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) btrfs_item_key_to_cpu(l, &found_key, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) if (found_key.objectid != scrub_dev->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) if (found_key.type != BTRFS_DEV_EXTENT_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) if (found_key.offset >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) if (found_key.offset < key.offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) length = btrfs_dev_extent_length(l, dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) if (found_key.offset + length <= start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) * get a reference on the corresponding block group to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) * the chunk from going away while we scrub it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) cache = btrfs_lookup_block_group(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) /* some chunks are removed but not committed to disk yet,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) * continue scrubbing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) if (!cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) * Make sure that while we are scrubbing the corresponding block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) * group doesn't get its logical address and its device extents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) * reused for another block group, which can possibly be of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) * different type and different profile. We do this to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) * false error detections and crashes due to bogus attempts to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) * repair extents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) spin_lock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) if (cache->removed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) btrfs_put_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) btrfs_freeze_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) * we need call btrfs_inc_block_group_ro() with scrubs_paused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) * to avoid deadlock caused by:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) * btrfs_inc_block_group_ro()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) * -> btrfs_wait_for_commit()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) * -> btrfs_commit_transaction()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) * -> btrfs_scrub_pause()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) scrub_pause_on(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) * Don't do chunk preallocation for scrub.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) * This is especially important for SYSTEM bgs, or we can hit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) * -EFBIG from btrfs_finish_chunk_alloc() like:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) * 1. The only SYSTEM bg is marked RO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) * Since SYSTEM bg is small, that's pretty common.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) * 2. New SYSTEM bg will be allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) * Due to regular version will allocate new chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) * 3. New SYSTEM bg is empty and will get cleaned up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) * Before cleanup really happens, it's marked RO again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) * 4. Empty SYSTEM bg get scrubbed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) * We go back to 2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) * This can easily boost the amount of SYSTEM chunks if cleaner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) * thread can't be triggered fast enough, and use up all space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) * of btrfs_super_block::sys_chunk_array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) * While for dev replace, we need to try our best to mark block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) * group RO, to prevent race between:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) * - Write duplication
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) * Contains latest data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) * - Scrub copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) * Contains data from commit tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) * If target block group is not marked RO, nocow writes can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) * be overwritten by scrub copy, causing data corruption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) * So for dev-replace, it's not allowed to continue if a block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) * group is not RO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) ro_set = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) * btrfs_inc_block_group_ro return -ENOSPC when it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) * failed in creating new chunk for metadata.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) * It is not a problem for scrub, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) * metadata are always cowed, and our scrub paused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) * commit_transactions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) ro_set = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) } else if (ret == -ETXTBSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) "skipping scrub of block group %llu due to active swapfile",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) cache->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) scrub_pause_off(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) goto skip_unfreeze;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) "failed setting block group ro: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) btrfs_unfreeze_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) btrfs_put_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) scrub_pause_off(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) * Now the target block is marked RO, wait for nocow writes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) * finish before dev-replace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) * COW is fine, as COW never overwrites extents in commit tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) if (sctx->is_dev_replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) btrfs_wait_nocow_writers(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) cache->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) scrub_pause_off(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) down_write(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) dev_replace->cursor_right = found_key.offset + length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) dev_replace->cursor_left = found_key.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) dev_replace->item_needs_writeback = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) up_write(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) found_key.offset, cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) * flush, submit all pending read and write bios, afterwards
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) * wait for them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) * Note that in the dev replace case, a read request causes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) * write requests that are submitted in the read completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) * worker. Therefore in the current situation, it is required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) * that all write requests are flushed, so that all read and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) * write requests are really completed when bios_in_flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) * changes to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) sctx->flush_all_writes = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) scrub_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) mutex_lock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) scrub_wr_submit(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) mutex_unlock(&sctx->wr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) wait_event(sctx->list_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) atomic_read(&sctx->bios_in_flight) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) scrub_pause_on(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) * must be called before we decrease @scrub_paused.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) * make sure we don't block transaction commit while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) * we are waiting pending workers finished.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) wait_event(sctx->list_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) atomic_read(&sctx->workers_pending) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) sctx->flush_all_writes = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) scrub_pause_off(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) down_write(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) dev_replace->cursor_left = dev_replace->cursor_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) dev_replace->item_needs_writeback = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) up_write(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) if (ro_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) btrfs_dec_block_group_ro(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) * We might have prevented the cleaner kthread from deleting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) * this block group if it was already unused because we raced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) * and set it to RO mode first. So add it back to the unused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) * list, otherwise it might not ever be deleted unless a manual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) * balance is triggered or it becomes used and unused again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) spin_lock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) if (!cache->removed && !cache->ro && cache->reserved == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) cache->used == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) btrfs_discard_queue_work(&fs_info->discard_ctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) btrfs_mark_bg_unused(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) spin_unlock(&cache->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) skip_unfreeze:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) btrfs_unfreeze_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) btrfs_put_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) if (sctx->is_dev_replace &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) atomic64_read(&dev_replace->num_write_errors) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) if (sctx->stat.malloc_errors > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) key.offset = found_key.offset + length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) struct btrfs_device *scrub_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) u64 bytenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) u64 gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) struct btrfs_fs_info *fs_info = sctx->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) /* Seed devices of a new filesystem has their own generation. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) if (scrub_dev->fs_devices != fs_info->fs_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) gen = scrub_dev->generation;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) gen = fs_info->last_trans_committed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) bytenr = btrfs_sb_offset(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) if (bytenr + BTRFS_SUPER_INFO_SIZE >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) scrub_dev->commit_total_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) NULL, 1, bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) static void scrub_workers_put(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) &fs_info->scrub_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) struct btrfs_workqueue *scrub_workers = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) struct btrfs_workqueue *scrub_wr_comp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) struct btrfs_workqueue *scrub_parity = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) scrub_workers = fs_info->scrub_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) scrub_wr_comp = fs_info->scrub_wr_completion_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) scrub_parity = fs_info->scrub_parity_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) fs_info->scrub_workers = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) fs_info->scrub_wr_completion_workers = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) fs_info->scrub_parity_workers = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) btrfs_destroy_workqueue(scrub_workers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) btrfs_destroy_workqueue(scrub_wr_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) btrfs_destroy_workqueue(scrub_parity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) * get a reference count on fs_info->scrub_workers. start worker if necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) int is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) struct btrfs_workqueue *scrub_workers = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) struct btrfs_workqueue *scrub_wr_comp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) struct btrfs_workqueue *scrub_parity = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) int max_active = fs_info->thread_pool_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) int ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) is_dev_replace ? 1 : max_active, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) if (!scrub_workers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) goto fail_scrub_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) max_active, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) if (!scrub_wr_comp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) goto fail_scrub_wr_completion_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) max_active, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) if (!scrub_parity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) goto fail_scrub_parity_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) ASSERT(fs_info->scrub_workers == NULL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) fs_info->scrub_wr_completion_workers == NULL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) fs_info->scrub_parity_workers == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) fs_info->scrub_workers = scrub_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) fs_info->scrub_wr_completion_workers = scrub_wr_comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) fs_info->scrub_parity_workers = scrub_parity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) refcount_set(&fs_info->scrub_workers_refcnt, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) /* Other thread raced in and created the workers for us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) refcount_inc(&fs_info->scrub_workers_refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) btrfs_destroy_workqueue(scrub_parity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) fail_scrub_parity_workers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) btrfs_destroy_workqueue(scrub_wr_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) fail_scrub_wr_completion_workers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) btrfs_destroy_workqueue(scrub_workers);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) fail_scrub_workers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) u64 end, struct btrfs_scrub_progress *progress,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) int readonly, int is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) struct scrub_ctx *sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) unsigned int nofs_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) if (btrfs_fs_closing(fs_info))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) * in this case scrub is unable to calculate the checksum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) * the way scrub is implemented. Do not handle this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) * situation at all because it won't ever happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) fs_info->nodesize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) BTRFS_STRIPE_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) if (fs_info->sectorsize != PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) /* not supported for data w/o checksums */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) btrfs_err_rl(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) fs_info->sectorsize, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) if (fs_info->nodesize >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) * would exhaust the array bounds of pagev member in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) * struct scrub_block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) fs_info->nodesize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) SCRUB_MAX_PAGES_PER_BLOCK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) fs_info->sectorsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) SCRUB_MAX_PAGES_PER_BLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) /* Allocate outside of device_list_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) sctx = scrub_setup_ctx(fs_info, is_dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) if (IS_ERR(sctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) return PTR_ERR(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) ret = scrub_workers_get(fs_info, is_dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) goto out_free_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) mutex_lock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) !is_dev_replace)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) if (!is_dev_replace && !readonly &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) btrfs_err_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) "scrub on devid %llu: filesystem on %s is not writable",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) devid, rcu_str_deref(dev->name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) ret = -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) down_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) if (dev->scrub_ctx ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) (!is_dev_replace &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) ret = -EINPROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) sctx->readonly = readonly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) dev->scrub_ctx = sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) * checking @scrub_pause_req here, we can avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) * race between committing transaction and scrubbing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) __scrub_blocked_if_needed(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) atomic_inc(&fs_info->scrubs_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) * In order to avoid deadlock with reclaim when there is a transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) * trying to pause scrub, make sure we use GFP_NOFS for all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) * allocations done at btrfs_scrub_pages() and scrub_pages_for_parity()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) * invoked by our callees. The pausing request is done when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) * transaction commit starts, and it blocks the transaction until scrub
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) * is paused (done at specific points at scrub_stripe() or right above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) * before incrementing fs_info->scrubs_running).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) nofs_flag = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) if (!is_dev_replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) btrfs_info(fs_info, "scrub: started on devid %llu", devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) * by holding device list mutex, we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) * kick off writing super in log tree sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) mutex_lock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) ret = scrub_supers(sctx, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) ret = scrub_enumerate_chunks(sctx, dev, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) memalloc_nofs_restore(nofs_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) atomic_dec(&fs_info->scrubs_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) wake_up(&fs_info->scrub_pause_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) if (progress)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) memcpy(progress, &sctx->stat, sizeof(*progress));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) if (!is_dev_replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) ret ? "not finished" : "finished", devid, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) dev->scrub_ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) scrub_workers_put(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) scrub_put_ctx(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) scrub_workers_put(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) out_free_ctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) scrub_free_ctx(sctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) atomic_inc(&fs_info->scrub_pause_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) while (atomic_read(&fs_info->scrubs_paused) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) atomic_read(&fs_info->scrubs_running)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) wait_event(fs_info->scrub_pause_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) atomic_read(&fs_info->scrubs_paused) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) atomic_read(&fs_info->scrubs_running));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) atomic_dec(&fs_info->scrub_pause_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) wake_up(&fs_info->scrub_pause_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) if (!atomic_read(&fs_info->scrubs_running)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) atomic_inc(&fs_info->scrub_cancel_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) while (atomic_read(&fs_info->scrubs_running)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) wait_event(fs_info->scrub_pause_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) atomic_read(&fs_info->scrubs_running) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) atomic_dec(&fs_info->scrub_cancel_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) struct btrfs_fs_info *fs_info = dev->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) struct scrub_ctx *sctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) sctx = dev->scrub_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) if (!sctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) atomic_inc(&sctx->cancel_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) while (dev->scrub_ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) wait_event(fs_info->scrub_pause_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) dev->scrub_ctx == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) mutex_lock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) mutex_unlock(&fs_info->scrub_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) struct btrfs_scrub_progress *progress)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) struct scrub_ctx *sctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) mutex_lock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) if (dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) sctx = dev->scrub_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) if (sctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) memcpy(progress, &sctx->stat, sizeof(*progress));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) u64 extent_logical, u64 extent_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) u64 *extent_physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) struct btrfs_device **extent_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) int *extent_mirror_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) u64 mapped_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) mapped_length = extent_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) &mapped_length, &bbio, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) if (ret || !bbio || mapped_length < extent_len ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) !bbio->stripes[0].dev->bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) *extent_physical = bbio->stripes[0].physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) *extent_mirror_num = bbio->mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) *extent_dev = bbio->stripes[0].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) }