^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2007 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/semaphore.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/uuid.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/list_sort.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/namei.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "misc.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "ctree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "extent_map.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "disk-io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include "transaction.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include "print-tree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include "volumes.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "raid56.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "async-thread.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "check-integrity.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "rcu-string.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "dev-replace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include "sysfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "tree-checker.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include "space-info.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include "block-group.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include "discard.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) [BTRFS_RAID_RAID10] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) .sub_stripes = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) .devs_max = 0, /* 0 == as many as possible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) .devs_min = 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) .tolerated_failures = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) .devs_increment = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) .ncopies = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) .nparity = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) .raid_name = "raid10",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) [BTRFS_RAID_RAID1] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) .devs_max = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) .devs_min = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) .tolerated_failures = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) .devs_increment = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) .ncopies = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) .nparity = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) .raid_name = "raid1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) [BTRFS_RAID_RAID1C3] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) .devs_max = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .devs_min = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) .tolerated_failures = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) .devs_increment = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) .ncopies = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) .nparity = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) .raid_name = "raid1c3",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) [BTRFS_RAID_RAID1C4] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) .devs_max = 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) .devs_min = 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) .tolerated_failures = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) .devs_increment = 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) .ncopies = 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) .nparity = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) .raid_name = "raid1c4",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) [BTRFS_RAID_DUP] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) .dev_stripes = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) .devs_max = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) .devs_min = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) .tolerated_failures = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) .devs_increment = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) .ncopies = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) .nparity = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) .raid_name = "dup",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) .bg_flag = BTRFS_BLOCK_GROUP_DUP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .mindev_error = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) [BTRFS_RAID_RAID0] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) .devs_max = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) .devs_min = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) .tolerated_failures = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) .devs_increment = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) .ncopies = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) .nparity = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) .raid_name = "raid0",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) .mindev_error = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) [BTRFS_RAID_SINGLE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) .devs_max = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) .devs_min = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) .tolerated_failures = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) .devs_increment = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) .ncopies = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) .nparity = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) .raid_name = "single",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) .bg_flag = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) .mindev_error = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) [BTRFS_RAID_RAID5] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) .devs_max = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) .devs_min = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) .tolerated_failures = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) .devs_increment = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) .ncopies = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) .nparity = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) .raid_name = "raid5",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) [BTRFS_RAID_RAID6] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) .sub_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) .dev_stripes = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) .devs_max = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) .devs_min = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) .tolerated_failures = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) .devs_increment = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) .ncopies = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) .nparity = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) .raid_name = "raid6",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) const char *btrfs_bg_type_to_raid_name(u64 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) const int index = btrfs_bg_flags_to_raid_index(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (index >= BTRFS_NR_RAID_TYPES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) return btrfs_raid_array[index].raid_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * Fill @buf with textual description of @bg_flags, no more than @size_buf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * bytes including terminating null byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) char *bp = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) u64 flags = bg_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) u32 size_bp = size_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) if (!flags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) strcpy(bp, "NONE");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) #define DESCRIBE_FLAG(flag, desc) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) if (flags & (flag)) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) ret = snprintf(bp, size_bp, "%s|", (desc)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (ret < 0 || ret >= size_bp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) goto out_overflow; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) size_bp -= ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) bp += ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) flags &= ~(flag); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) btrfs_raid_array[i].raid_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) #undef DESCRIBE_FLAG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (flags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) ret = snprintf(bp, size_bp, "0x%llx|", flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) size_bp -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (size_bp < size_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) buf[size_buf - size_bp - 1] = '\0'; /* remove last | */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * The text is trimmed, it's up to the caller to provide sufficiently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * large buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) out_overflow:;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) static int init_first_rw_device(struct btrfs_trans_handle *trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) enum btrfs_map_op op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) u64 logical, u64 *length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) struct btrfs_bio **bbio_ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) int mirror_num, int need_raid_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * Device locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * ==============
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * There are several mutexes that protect manipulation of devices and low-level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * structures like chunks but not block groups, extents or files
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * uuid_mutex (global lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * ------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * protects the fs_uuids list that tracks all per-fs fs_devices, resulting from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * the SCAN_DEV ioctl registration or from mount either implicitly (the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * device) or requested by the device= mount option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * the mutex can be very coarse and can cover long-running operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * protects: updates to fs_devices counters like missing devices, rw devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * seeding, structure cloning, opening/closing devices at mount/umount time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * global::fs_devs - add, remove, updates to the global list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * does not protect: manipulation of the fs_devices::devices list in general
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * but in mount context it could be used to exclude list modifications by eg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * scan ioctl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * btrfs_device::name - renames (write side), read is RCU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * fs_devices::device_list_mutex (per-fs, with RCU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * ------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * protects updates to fs_devices::devices, ie. adding and deleting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * simple list traversal with read-only actions can be done with RCU protection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * may be used to exclude some operations from running concurrently without any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * modifications to the list (see write_all_supers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * Is not required at mount and close times, because our device list is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * protected by the uuid_mutex at that point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * balance_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * -------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * protects balance structures (status, state) and context accessed from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * several places (internally, ioctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * chunk_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * -----------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * protects chunks, adding or removing during allocation, trim or when a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * device is added/removed. Additionally it also protects post_commit_list of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * individual devices, since they can be added to the transaction's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * post_commit_list only with chunk_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * cleaner_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * -------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * a big lock that is held by the cleaner thread and prevents running subvolume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * cleaning together with relocation or delayed iputs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * Lock nesting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * ============
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * uuid_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * device_list_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * chunk_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * balance_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * Exclusive operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * ====================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * Maintains the exclusivity of the following operations that apply to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * whole filesystem and cannot run in parallel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * - Balance (*)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * - Device add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * - Device remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * - Device replace (*)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * - Resize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * The device operations (as above) can be in one of the following states:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * - Running state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * - Paused state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * - Completed state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * Only device operations marked with (*) can go into the Paused state for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * following reasons:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * - ioctl (only Balance can be Paused through ioctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * - filesystem remounted as read-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * - filesystem unmounted and mounted as read-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * - system power-cycle and filesystem mounted as read-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * - filesystem or device errors leading to forced read-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * The status of exclusive operation is set and cleared atomically.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * During the course of Paused state, fs_info::exclusive_operation remains set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * A device operation in Paused or Running state can be canceled or resumed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * either by ioctl (Balance only) or when remounted as read-write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * The exclusive status is cleared when the device operation is canceled or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * completed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) DEFINE_MUTEX(uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) static LIST_HEAD(fs_uuids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct list_head * __attribute_const__ btrfs_get_fs_uuids(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) return &fs_uuids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * alloc_fs_devices - allocate struct btrfs_fs_devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * @fsid: if not NULL, copy the UUID to fs_devices::fsid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * @metadata_fsid: if not NULL, copy the UUID to fs_devices::metadata_fsid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * Return a pointer to a new struct btrfs_fs_devices on success, or ERR_PTR().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * The returned struct is not linked onto any lists and can be destroyed with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * kfree() right away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) const u8 *metadata_fsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) struct btrfs_fs_devices *fs_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (!fs_devs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) mutex_init(&fs_devs->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) INIT_LIST_HEAD(&fs_devs->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) INIT_LIST_HEAD(&fs_devs->alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) INIT_LIST_HEAD(&fs_devs->fs_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) INIT_LIST_HEAD(&fs_devs->seed_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) if (fsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (metadata_fsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) else if (fsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) return fs_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) void btrfs_free_device(struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) WARN_ON(!list_empty(&device->post_commit_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) rcu_string_free(device->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) extent_io_tree_release(&device->alloc_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) bio_put(device->flush_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) kfree(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) WARN_ON(fs_devices->opened);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) while (!list_empty(&fs_devices->devices)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) device = list_entry(fs_devices->devices.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) struct btrfs_device, dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) list_del(&device->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) kfree(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) void __exit btrfs_cleanup_fs_uuids(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) while (!list_empty(&fs_uuids)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) fs_devices = list_entry(fs_uuids.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) struct btrfs_fs_devices, fs_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) list_del(&fs_devices->fs_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) free_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * Returned struct is not linked onto any lists and must be destroyed using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * btrfs_free_device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) dev = kzalloc(sizeof(*dev), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) if (!dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * Preallocate a bio that's always going to be used for flushing device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * barriers and matches the device lifespan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (!dev->flush_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) kfree(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) INIT_LIST_HEAD(&dev->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) INIT_LIST_HEAD(&dev->dev_alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) INIT_LIST_HEAD(&dev->post_commit_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) atomic_set(&dev->reada_in_flight, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) atomic_set(&dev->dev_stats_ccnt, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) btrfs_device_data_ordered_init(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) extent_io_tree_init(fs_info, &dev->alloc_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) IO_TREE_DEVICE_ALLOC_STATE, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) return dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) static noinline struct btrfs_fs_devices *find_fsid(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) const u8 *fsid, const u8 *metadata_fsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) ASSERT(fsid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) /* Handle non-split brain cases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (metadata_fsid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) && memcmp(metadata_fsid, fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) BTRFS_FSID_SIZE) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) struct btrfs_super_block *disk_super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * Handle scanned device having completed its fsid change but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * belonging to a fs_devices that was created by first scanning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * a device which didn't have its fsid/metadata_uuid changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * at all and the CHANGING_FSID_V2 flag set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) if (fs_devices->fsid_change &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) memcmp(disk_super->metadata_uuid, fs_devices->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) BTRFS_FSID_SIZE) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) BTRFS_FSID_SIZE) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * Handle scanned device having completed its fsid change but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * belonging to a fs_devices that was created by a device that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * has an outdated pair of fsid/metadata_uuid and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * CHANGING_FSID_V2 flag set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (fs_devices->fsid_change &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) memcmp(fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) BTRFS_FSID_SIZE) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) int flush, struct block_device **bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct btrfs_super_block **disk_super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) *bdev = blkdev_get_by_path(device_path, flags, holder);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) if (IS_ERR(*bdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) ret = PTR_ERR(*bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) blkdev_put(*bdev, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) invalidate_bdev(*bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) *disk_super = btrfs_read_dev_super(*bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) if (IS_ERR(*disk_super)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) ret = PTR_ERR(*disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) blkdev_put(*bdev, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) *bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) static bool device_path_matched(const char *path, struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) int found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) found = strcmp(rcu_str_deref(device->name), path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) return found == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * Search and remove all stale (devices which are not mounted) devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) * When both inputs are NULL, it will search and release all stale devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) * path: Optional. When provided will it release all unmounted devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) * matching this path only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * skip_dev: Optional. Will skip this device when searching for the stale
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * Return: 0 for success or if @path is NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * -EBUSY if @path is a mounted device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * -ENOENT if @path does not match any device in the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) static int btrfs_free_stale_devices(const char *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct btrfs_device *skip_device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) struct btrfs_device *device, *tmp_device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) lockdep_assert_held(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) if (path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) list_for_each_entry_safe(device, tmp_device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) if (skip_device && skip_device == device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) if (path && !device->name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (path && !device_path_matched(path, device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (fs_devices->opened) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) /* for an already deleted device return 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) if (path && ret != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) /* delete the stale device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) fs_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) list_del(&device->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) if (fs_devices->num_devices == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) btrfs_sysfs_remove_fsid(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) list_del(&fs_devices->fs_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) free_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * This is only used on mount, and we are protected from competing things
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * messing with our fs_devices by the uuid_mutex, thus we do not need the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * fs_devices->device_list_mutex here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) struct btrfs_device *device, fmode_t flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) void *holder)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) struct block_device *bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) struct btrfs_super_block *disk_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) u64 devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (device->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (!device->name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) &bdev, &disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) devid = btrfs_stack_device_id(&disk_super->dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) if (devid != device->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) goto error_free_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) goto error_free_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) device->generation = btrfs_super_generation(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (btrfs_super_incompat_flags(disk_super) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) pr_err(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) "BTRFS: Invalid seeding and uuid-changed device detected\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) goto error_free_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) fs_devices->seeding = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) if (bdev_read_only(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) q = bdev_get_queue(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (!blk_queue_nonrot(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) fs_devices->rotating = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) device->bdev = bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) device->mode = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) fs_devices->open_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) device->devid != BTRFS_DEV_REPLACE_DEVID) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) fs_devices->rw_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) btrfs_release_disk_super(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) error_free_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) btrfs_release_disk_super(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) blkdev_put(bdev, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) * being created with a disk that has already completed its fsid change. Such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * disk can belong to an fs which has its FSID changed or to one which doesn't.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * Handle both cases here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) static struct btrfs_fs_devices *find_fsid_inprogress(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) struct btrfs_super_block *disk_super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) BTRFS_FSID_SIZE) != 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) memcmp(fs_devices->metadata_uuid, disk_super->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) return find_fsid(disk_super->fsid, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) static struct btrfs_fs_devices *find_fsid_changed(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) struct btrfs_super_block *disk_super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * Handles the case where scanned device is part of an fs that had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * multiple successful changes of FSID but curently device didn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * observe it. Meaning our fsid will be different than theirs. We need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * to handle two subcases :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * 1 - The fs still continues to have different METADATA/FSID uuids.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) * 2 - The fs is switched back to its original FSID (METADATA/FSID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) * are equal).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /* Changed UUIDs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) BTRFS_FSID_SIZE) != 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) BTRFS_FSID_SIZE) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) memcmp(fs_devices->fsid, disk_super->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) BTRFS_FSID_SIZE) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) /* Unchanged UUIDs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) BTRFS_FSID_SIZE) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) memcmp(fs_devices->fsid, disk_super->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) BTRFS_FSID_SIZE) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) static struct btrfs_fs_devices *find_fsid_reverted_metadata(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) struct btrfs_super_block *disk_super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) * Handle the case where the scanned device is part of an fs whose last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) * metadata UUID change reverted it to the original FSID. At the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) * time * fs_devices was first created by another constitutent device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) * which didn't fully observe the operation. This results in an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) * btrfs_fs_devices created with metadata/fsid different AND
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * btrfs_fs_devices::fsid_change set AND the metadata_uuid of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) * fs_devices equal to the FSID of the disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) BTRFS_FSID_SIZE) != 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) memcmp(fs_devices->metadata_uuid, disk_super->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) BTRFS_FSID_SIZE) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) fs_devices->fsid_change)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * Add new device to list of registered devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * device pointer which was just added or updated when successful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * error pointer when failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) static noinline struct btrfs_device *device_list_add(const char *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) struct btrfs_super_block *disk_super,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) bool *new_device_added)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) struct btrfs_fs_devices *fs_devices = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) struct rcu_string *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) u64 found_transid = btrfs_super_generation(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) if (fsid_change_in_progress) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (!has_metadata_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) fs_devices = find_fsid_inprogress(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) fs_devices = find_fsid_changed(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) } else if (has_metadata_uuid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) fs_devices = find_fsid_with_metadata_uuid(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) fs_devices = find_fsid_reverted_metadata(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) if (!fs_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) fs_devices = find_fsid(disk_super->fsid, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) if (!fs_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) if (has_metadata_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) fs_devices = alloc_fs_devices(disk_super->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) disk_super->metadata_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) if (IS_ERR(fs_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) return ERR_CAST(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) fs_devices->fsid_change = fsid_change_in_progress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) list_add(&fs_devices->fs_list, &fs_uuids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) device = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) device = btrfs_find_device(fs_devices, devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) disk_super->dev_item.uuid, NULL, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * If this disk has been pulled into an fs devices created by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * a device which had the CHANGING_FSID_V2 flag then replace the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) * metadata_uuid/fsid values of the fs_devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (fs_devices->fsid_change &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) found_transid > fs_devices->latest_generation) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) memcpy(fs_devices->fsid, disk_super->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) if (has_metadata_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) memcpy(fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) disk_super->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) memcpy(fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) disk_super->fsid, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) fs_devices->fsid_change = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (!device) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (fs_devices->opened) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) return ERR_PTR(-EBUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) device = btrfs_alloc_device(NULL, &devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) disk_super->dev_item.uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) if (IS_ERR(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) /* we can safely leave the fs_devices entry around */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) name = rcu_string_strdup(path, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) if (!name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) rcu_assign_pointer(device->name, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) list_add_rcu(&device->dev_list, &fs_devices->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) fs_devices->num_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) device->fs_devices = fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) *new_device_added = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) if (disk_super->label[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) pr_info(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) disk_super->label, devid, found_transid, path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) current->comm, task_pid_nr(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) pr_info(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) disk_super->fsid, devid, found_transid, path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) current->comm, task_pid_nr(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) } else if (!device->name || strcmp(device->name->str, path)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) * When FS is already mounted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) * 1. If you are here and if the device->name is NULL that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) * means this device was missing at time of FS mount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) * 2. If you are here and if the device->name is different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) * from 'path' that means either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) * a. The same device disappeared and reappeared with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) * different name. or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) * b. The missing-disk-which-was-replaced, has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) * reappeared now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) * We must allow 1 and 2a above. But 2b would be a spurious
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * and unintentional.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) * Further in case of 1 and 2a above, the disk at 'path'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) * would have missed some transaction when it was away and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) * in case of 2a the stale bdev has to be updated as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) * 2b must not be allowed at all time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * For now, we do allow update to btrfs_fs_device through the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * btrfs dev scan cli after FS has been mounted. We're still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * tracking a problem where systems fail mount by subvolume id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * when we reject replacement on a mounted FS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (!fs_devices->opened && found_transid < device->generation) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * That is if the FS is _not_ mounted and if you
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * are here, that means there is more than one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * disk with same uuid and devid.We keep the one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) * with larger generation number or the last-in if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) * generation are equal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) return ERR_PTR(-EEXIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) * We are going to replace the device path for a given devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) * make sure it's the same device if the device is mounted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) if (device->bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) struct block_device *path_bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) path_bdev = lookup_bdev(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) if (IS_ERR(path_bdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) return ERR_CAST(path_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) if (device->bdev != path_bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) bdput(path_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) * device->fs_info may not be reliable here, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) * pass in a NULL instead. This avoids a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * possible use-after-free when the fs_info and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * fs_info->sb are already torn down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) btrfs_warn_in_rcu(NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) path, devid, found_transid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) current->comm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) task_pid_nr(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) return ERR_PTR(-EEXIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) bdput(path_bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) btrfs_info_in_rcu(device->fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) "devid %llu device path %s changed to %s scanned by %s (%d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) devid, rcu_str_deref(device->name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) path, current->comm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) task_pid_nr(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) name = rcu_string_strdup(path, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) if (!name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) rcu_string_free(device->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) rcu_assign_pointer(device->name, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) fs_devices->missing_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) * Unmount does not free the btrfs_device struct but would zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) * generation along with most of the other members. So just update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) * it back. We need it to pick the disk with largest generation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * (as above).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) if (!fs_devices->opened) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) device->generation = found_transid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) fs_devices->latest_generation = max_t(u64, found_transid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) fs_devices->latest_generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) fs_devices->total_devices = btrfs_super_num_devices(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) struct btrfs_device *orig_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) lockdep_assert_held(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) fs_devices = alloc_fs_devices(orig->fsid, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) if (IS_ERR(fs_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) fs_devices->total_devices = orig->total_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) list_for_each_entry(orig_dev, &orig->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) struct rcu_string *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) device = btrfs_alloc_device(NULL, &orig_dev->devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) orig_dev->uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (IS_ERR(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) ret = PTR_ERR(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * This is ok to do without rcu read locked because we hold the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * uuid mutex so nothing we touch in here is going to disappear.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) if (orig_dev->name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) name = rcu_string_strdup(orig_dev->name->str,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) if (!name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) rcu_assign_pointer(device->name, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) list_add(&device->dev_list, &fs_devices->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) device->fs_devices = fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) fs_devices->num_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) free_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) int step, struct btrfs_device **latest_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) struct btrfs_device *device, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) /* This is the initialized path, it is safe to release the devices. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) &device->dev_state) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) !test_bit(BTRFS_DEV_STATE_MISSING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) &device->dev_state) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) (!*latest_dev ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) device->generation > (*latest_dev)->generation)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) *latest_dev = device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) * We have already validated the presence of BTRFS_DEV_REPLACE_DEVID,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) * in btrfs_init_dev_replace() so just continue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (device->devid == BTRFS_DEV_REPLACE_DEVID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) if (device->bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) blkdev_put(device->bdev, device->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) device->bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) fs_devices->open_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) list_del_init(&device->dev_alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) fs_devices->rw_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) list_del_init(&device->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) fs_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * After we have read the system tree and know devids belonging to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * filesystem, remove the device which does not belong there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) struct btrfs_device *latest_dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) struct btrfs_fs_devices *seed_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) mutex_lock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) __btrfs_free_extra_devids(fs_devices, step, &latest_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) __btrfs_free_extra_devids(seed_dev, step, &latest_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) fs_devices->latest_bdev = latest_dev->bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) mutex_unlock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) static void btrfs_close_bdev(struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) if (!device->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) sync_blockdev(device->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) invalidate_bdev(device->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) blkdev_put(device->bdev, device->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) static void btrfs_close_one_device(struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) struct btrfs_fs_devices *fs_devices = device->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) device->devid != BTRFS_DEV_REPLACE_DEVID) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) list_del_init(&device->dev_alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) fs_devices->rw_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) if (device->devid == BTRFS_DEV_REPLACE_DEVID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) fs_devices->missing_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) btrfs_close_bdev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) if (device->bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) fs_devices->open_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) device->bdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) device->fs_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) atomic_set(&device->dev_stats_ccnt, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) extent_io_tree_release(&device->alloc_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * Reset the flush error record. We might have a transient flush error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * in this mount, and if so we aborted the current transaction and set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * the fs to an error state, guaranteeing no super blocks can be further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) * committed. However that error might be transient and if we unmount the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * filesystem and mount it again, we should allow the mount to succeed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) * (btrfs_check_rw_degradable() should not fail) - if after mounting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) * filesystem again we still get flush errors, then we will again abort
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) * any transaction and set the error state, guaranteeing no commits of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) * unsafe super blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) device->last_flush_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) /* Verify the device is back in a pristine state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) ASSERT(list_empty(&device->dev_alloc_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) ASSERT(list_empty(&device->post_commit_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) ASSERT(atomic_read(&device->reada_in_flight) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) static void close_fs_devices(struct btrfs_fs_devices *fs_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) struct btrfs_device *device, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) lockdep_assert_held(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) if (--fs_devices->opened > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) btrfs_close_one_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) WARN_ON(fs_devices->open_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) WARN_ON(fs_devices->rw_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) fs_devices->opened = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) fs_devices->seeding = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) fs_devices->fs_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) struct btrfs_fs_devices *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) mutex_lock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) close_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) if (!fs_devices->opened)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) list_splice_init(&fs_devices->seed_list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) close_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) list_del(&fs_devices->seed_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) free_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) mutex_unlock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) fmode_t flags, void *holder)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) struct btrfs_device *latest_dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) struct btrfs_device *tmp_device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) flags |= FMODE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) ret = btrfs_open_one_device(fs_devices, device, flags, holder);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) if (ret == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) (!latest_dev || device->generation > latest_dev->generation)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) latest_dev = device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) } else if (ret == -ENODATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) fs_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) list_del(&device->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) if (fs_devices->open_devices == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) fs_devices->opened = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) fs_devices->latest_bdev = latest_dev->bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) fs_devices->total_rw_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) struct btrfs_device *dev1, *dev2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) dev1 = list_entry(a, struct btrfs_device, dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) dev2 = list_entry(b, struct btrfs_device, dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) if (dev1->devid < dev2->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) else if (dev1->devid > dev2->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) fmode_t flags, void *holder)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) lockdep_assert_held(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * The device_list_mutex cannot be taken here in case opening the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) * underlying device takes further locks like bd_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) * We also don't need the lock here as this is called during mount and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) * exclusion is provided by uuid_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (fs_devices->opened) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) fs_devices->opened++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) list_sort(NULL, &fs_devices->devices, devid_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) ret = open_fs_devices(fs_devices, flags, holder);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) void btrfs_release_disk_super(struct btrfs_super_block *super)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) struct page *page = virt_to_page(super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) u64 bytenr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) struct btrfs_super_block *disk_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) pgoff_t index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) /* make sure our super fits in the device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) /* make sure our super fits in the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) if (sizeof(*disk_super) > PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) /* make sure our super doesn't straddle pages on disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) index = bytenr >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) /* pull in the page with our super */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (IS_ERR(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) return ERR_CAST(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) p = page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) /* align our pointer to the offset of the super block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) disk_super = p + offset_in_page(bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (btrfs_super_bytenr(disk_super) != bytenr ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) btrfs_release_disk_super(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) return disk_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) int btrfs_forget_devices(const char *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) mutex_lock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) mutex_unlock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) * Look for a btrfs signature on a device. This may be called out of the mount path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) * and we are not allowed to call set_blocksize during the scan. The superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) * is read via pagecache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) void *holder)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) struct btrfs_super_block *disk_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) bool new_device_added = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) struct btrfs_device *device = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) struct block_device *bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) u64 bytenr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) lockdep_assert_held(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) * we would like to check all the supers, but that would make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) * a btrfs mount succeed after a mkfs from a different FS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) * So, we need to add a special mount option to scan for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) * later supers, using BTRFS_SUPER_MIRROR_MAX instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) bytenr = btrfs_sb_offset(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) flags |= FMODE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) bdev = blkdev_get_by_path(path, flags, holder);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) if (IS_ERR(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) return ERR_CAST(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) disk_super = btrfs_read_disk_super(bdev, bytenr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) if (IS_ERR(disk_super)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) device = ERR_CAST(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) goto error_bdev_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) device = device_list_add(path, disk_super, &new_device_added);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) if (!IS_ERR(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (new_device_added)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) btrfs_free_stale_devices(path, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) btrfs_release_disk_super(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) error_bdev_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) blkdev_put(bdev, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * Try to find a chunk that intersects [start, start + len] range and when one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * such is found, record the end of it in *start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) u64 physical_start, physical_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) lockdep_assert_held(&device->fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) if (!find_first_extent_bit(&device->alloc_state, *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) &physical_start, &physical_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) CHUNK_ALLOCATED, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) if (in_range(physical_start, *start, len) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) in_range(*start, physical_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) physical_end - physical_start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) *start = physical_end + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) switch (device->fs_devices->chunk_alloc_policy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) case BTRFS_CHUNK_ALLOC_REGULAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) * We don't want to overwrite the superblock on the drive nor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) * any area used by the boot loader (grub for example), so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) * make sure to start at an offset of at least 1MB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) return max_t(u64, start, SZ_1M);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * dev_extent_hole_check - check if specified hole is suitable for allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) * @device: the device which we have the hole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) * @hole_start: starting position of the hole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) * @hole_size: the size of the hole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) * @num_bytes: the size of the free space that we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) * This function may modify @hole_start and @hole_end to reflect the suitable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) * position for allocation. Returns 1 if hole position is updated, 0 otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) u64 *hole_size, u64 num_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) bool changed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) u64 hole_end = *hole_start + *hole_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * Check before we set max_hole_start, otherwise we could end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * sending back this offset anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) if (contains_pending_extent(device, hole_start, *hole_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) if (hole_end >= *hole_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) *hole_size = hole_end - *hole_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) *hole_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) changed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) switch (device->fs_devices->chunk_alloc_policy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) case BTRFS_CHUNK_ALLOC_REGULAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) /* No extra check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) return changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) * find_free_dev_extent_start - find free space in the specified device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) * @device: the device which we search the free space in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) * @num_bytes: the size of the free space that we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * @search_start: the position from which to begin the search
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * @start: store the start of the free space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) * @len: the size of the free space. that we find, or the size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) * of the max free space if we don't find suitable free space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) * this uses a pretty simple search, the expectation is that it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) * called very infrequently and that a given device has a small number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) * of extents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) * @start is used to store the start of the free space if we find. But if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) * don't find suitable free space, it will be used to store the start position
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) * of the max free space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) * @len is used to store the size of the free space that we find.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) * But if we don't find suitable free space, it is used to store the size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) * the max free space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) * NOTE: This function will search *commit* root of device tree, and does extra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) * check to ensure dev extents are not double allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * This makes the function safe to allocate dev extents but may not report
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) * correct usable device space, as device extent freed in current transaction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) * is not reported as avaiable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) static int find_free_dev_extent_start(struct btrfs_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) u64 num_bytes, u64 search_start, u64 *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) u64 *len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) struct btrfs_fs_info *fs_info = device->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) struct btrfs_root *root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) struct btrfs_dev_extent *dev_extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) u64 hole_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) u64 max_hole_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) u64 max_hole_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) u64 extent_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) u64 search_end = device->total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) struct extent_buffer *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) search_start = dev_extent_search_start(device, search_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) max_hole_start = search_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) max_hole_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) if (search_start >= search_end ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) path->reada = READA_FORWARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) path->search_commit_root = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) path->skip_locking = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) key.objectid = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) key.offset = search_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) key.type = BTRFS_DEV_EXTENT_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) ret = btrfs_previous_item(root, path, key.objectid, key.type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) l = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) if (slot >= btrfs_header_nritems(l)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) ret = btrfs_next_leaf(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) btrfs_item_key_to_cpu(l, &key, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) if (key.objectid < device->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) if (key.objectid > device->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) if (key.type != BTRFS_DEV_EXTENT_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) if (key.offset > search_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) hole_size = key.offset - search_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) dev_extent_hole_check(device, &search_start, &hole_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) num_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) if (hole_size > max_hole_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) max_hole_start = search_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) max_hole_size = hole_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) * If this free space is greater than which we need,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) * it must be the max free space that we have found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) * until now, so max_hole_start must point to the start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) * of this free space and the length of this free space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) * is stored in max_hole_size. Thus, we return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) * max_hole_start and max_hole_size and go back to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) * caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) if (hole_size >= num_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) extent_end = key.offset + btrfs_dev_extent_length(l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) if (extent_end > search_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) search_start = extent_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) path->slots[0]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * At this point, search_start should be the end of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * allocated dev extents, and when shrinking the device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * search_end may be smaller than search_start.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) if (search_end > search_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) hole_size = search_end - search_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) if (dev_extent_hole_check(device, &search_start, &hole_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) num_bytes)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) if (hole_size > max_hole_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) max_hole_start = search_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) max_hole_size = hole_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) /* See above. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) if (max_hole_size < num_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) *start = max_hole_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) *len = max_hole_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) u64 *start, u64 *len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) /* FIXME use last free of some kind */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) return find_free_dev_extent_start(device, num_bytes, 0, start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) struct btrfs_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) u64 start, u64 *dev_extent_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) struct btrfs_fs_info *fs_info = device->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) struct btrfs_root *root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) struct btrfs_key found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) struct extent_buffer *leaf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) struct btrfs_dev_extent *extent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) key.objectid = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) key.offset = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) key.type = BTRFS_DEV_EXTENT_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) ret = btrfs_previous_item(root, path, key.objectid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) BTRFS_DEV_EXTENT_KEY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) extent = btrfs_item_ptr(leaf, path->slots[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) struct btrfs_dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) BUG_ON(found_key.offset > start || found_key.offset +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) btrfs_dev_extent_length(leaf, extent) < start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) key = found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) } else if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) extent = btrfs_item_ptr(leaf, path->slots[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) struct btrfs_dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) btrfs_handle_fs_error(fs_info, ret, "Slot search failed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) ret = btrfs_del_item(trans, root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) btrfs_handle_fs_error(fs_info, ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) "Failed to remove dev extent item");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) struct btrfs_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) u64 chunk_offset, u64 start, u64 num_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) struct btrfs_fs_info *fs_info = device->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) struct btrfs_root *root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) struct btrfs_dev_extent *extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) key.objectid = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) key.offset = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) key.type = BTRFS_DEV_EXTENT_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) ret = btrfs_insert_empty_item(trans, root, path, &key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) sizeof(*extent));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) extent = btrfs_item_ptr(leaf, path->slots[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) struct btrfs_dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) btrfs_set_dev_extent_chunk_tree(leaf, extent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) BTRFS_CHUNK_TREE_OBJECTID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) btrfs_set_dev_extent_chunk_objectid(leaf, extent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) BTRFS_FIRST_CHUNK_TREE_OBJECTID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) btrfs_set_dev_extent_length(leaf, extent, num_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) btrfs_mark_buffer_dirty(leaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) struct extent_map_tree *em_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) u64 ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) em_tree = &fs_info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) read_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) n = rb_last(&em_tree->map.rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) if (n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) em = rb_entry(n, struct extent_map, rb_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) ret = em->start + em->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) read_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) u64 *devid_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) struct btrfs_key found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) key.type = BTRFS_DEV_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) key.offset = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) /* Corruption */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) ret = btrfs_previous_item(fs_info->chunk_root, path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) BTRFS_DEV_ITEMS_OBJECTID,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) BTRFS_DEV_ITEM_KEY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) *devid_ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) btrfs_item_key_to_cpu(path->nodes[0], &found_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) path->slots[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) *devid_ret = found_key.offset + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * the device information is stored in the chunk root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) * the btrfs_device struct should be fully filled in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) struct btrfs_dev_item *dev_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) unsigned long ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) key.type = BTRFS_DEV_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) key.offset = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) &key, sizeof(*dev_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) btrfs_set_device_id(leaf, dev_item, device->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) btrfs_set_device_generation(leaf, dev_item, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) btrfs_set_device_type(leaf, dev_item, device->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) btrfs_set_device_io_align(leaf, dev_item, device->io_align);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) btrfs_set_device_io_width(leaf, dev_item, device->io_width);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) btrfs_set_device_total_bytes(leaf, dev_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) btrfs_device_get_disk_total_bytes(device));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) btrfs_set_device_bytes_used(leaf, dev_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) btrfs_device_get_bytes_used(device));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) btrfs_set_device_group(leaf, dev_item, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) btrfs_set_device_seek_speed(leaf, dev_item, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) btrfs_set_device_bandwidth(leaf, dev_item, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) btrfs_set_device_start_offset(leaf, dev_item, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) ptr = btrfs_device_uuid(dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) ptr = btrfs_device_fsid(dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) ptr, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) btrfs_mark_buffer_dirty(leaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) * Function to update ctime/mtime for a given device path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) * Mainly used for ctime/mtime based probe like libblkid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) * We don't care about errors here, this is just to be kind to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) static void update_dev_time(const char *device_path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) struct path path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) struct timespec64 now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) now = current_time(d_inode(path.dentry));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) path_put(&path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) static int btrfs_rm_dev_item(struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) struct btrfs_root *root = device->fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) trans = btrfs_start_transaction(root, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) return PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) key.type = BTRFS_DEV_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) key.offset = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) ret = btrfs_del_item(trans, root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) ret = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) * Verify that @num_devices satisfies the RAID profile constraints in the whole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) * filesystem. It's up to the caller to adjust that number regarding eg. device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) * replace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) u64 num_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) u64 all_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) seq = read_seqbegin(&fs_info->profiles_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) all_avail = fs_info->avail_data_alloc_bits |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) fs_info->avail_system_alloc_bits |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) fs_info->avail_metadata_alloc_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) } while (read_seqretry(&fs_info->profiles_lock, seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) if (!(all_avail & btrfs_raid_array[i].bg_flag))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) if (num_devices < btrfs_raid_array[i].devs_min) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) int ret = btrfs_raid_array[i].mindev_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) static struct btrfs_device * btrfs_find_next_active_device(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) struct btrfs_device *next_device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) if (next_device != device &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) && next_device->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) return next_device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) * Helper function to check if the given device is part of s_bdev / latest_bdev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) * and replace it with the provided or the next active device, in the context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) * where this function called, there should be always be another device (or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) * this_dev) which is active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) struct btrfs_device *next_device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) struct btrfs_fs_info *fs_info = device->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (!next_device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) next_device = btrfs_find_next_active_device(fs_info->fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) ASSERT(next_device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) if (fs_info->sb->s_bdev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) (fs_info->sb->s_bdev == device->bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) fs_info->sb->s_bdev = next_device->bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) if (fs_info->fs_devices->latest_bdev == device->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) fs_info->fs_devices->latest_bdev = next_device->bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) * Return btrfs_fs_devices::num_devices excluding the device that's being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) * currently replaced.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) u64 num_devices = fs_info->fs_devices->num_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) down_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) ASSERT(num_devices > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) return num_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) const char *device_path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) struct btrfs_super_block *disk_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) int copy_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) if (!bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) disk_super = btrfs_read_dev_one_super(bdev, copy_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) if (IS_ERR(disk_super))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) memset(&disk_super->magic, 0, sizeof(disk_super->magic));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) page = virt_to_page(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) /* write_on_page() unlocks the page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) ret = write_one_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) "error clearing superblock number %d (%d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) copy_num, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) btrfs_release_disk_super(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) /* Notify udev that device has changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) /* Update ctime/mtime for device path for libblkid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) update_dev_time(device_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) u64 devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) struct btrfs_fs_devices *cur_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) u64 num_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) * The device list in fs_devices is accessed without locks (neither
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) * uuid_mutex nor device_list_mutex) as it won't change on a mounted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) * filesystem and another device rm cannot run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) num_devices = btrfs_num_devices(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) if (IS_ERR(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) if (PTR_ERR(device) == -ENOENT &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) device_path && strcmp(device_path, "missing") == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) ret = PTR_ERR(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) if (btrfs_pinned_by_swapfile(fs_info, device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) btrfs_warn_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) "cannot remove device %s (devid %llu) due to active swapfile",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) rcu_str_deref(device->name), device->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) ret = -ETXTBSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) ret = BTRFS_ERROR_DEV_TGT_REPLACE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) fs_info->fs_devices->rw_devices == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) list_del_init(&device->dev_alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) device->fs_devices->rw_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) ret = btrfs_shrink_device(device, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) btrfs_reada_remove_dev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) goto error_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) * TODO: the superblock still includes this device in its num_devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) * counter although write_all_supers() is not locked out. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) * could give a filesystem state which requires a degraded mount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) ret = btrfs_rm_dev_item(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) goto error_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) btrfs_scrub_cancel_dev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) * the device list mutex makes sure that we don't change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) * the device list while someone else is writing out all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) * the device supers. Whoever is writing all supers, should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) * lock the device list mutex before getting the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) * devices in the super block (super_copy). Conversely,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) * whoever updates the number of devices in the super block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) * (super_copy) should hold the device list mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) * In normal cases the cur_devices == fs_devices. But in case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) * of deleting a seed device, the cur_devices should point to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) * its own fs_devices listed under the fs_devices->seed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) cur_devices = device->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) list_del_rcu(&device->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) cur_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) cur_devices->total_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) /* Update total_devices of the parent fs_devices if it's seed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) if (cur_devices != fs_devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) fs_devices->total_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) cur_devices->missing_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) btrfs_assign_next_active_device(device, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) if (device->bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) cur_devices->open_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) /* remove sysfs entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) btrfs_sysfs_remove_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) * at this point, the device is zero sized and detached from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) * the devices list. All that's left is to zero out the old
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) * supers and free the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) btrfs_scratch_superblocks(fs_info, device->bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) device->name->str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) btrfs_close_bdev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) if (cur_devices->open_devices == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) list_del_init(&cur_devices->seed_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) close_fs_devices(cur_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) free_fs_devices(cur_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) error_undo:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) btrfs_reada_undo_remove_dev(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) list_add(&device->dev_alloc_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) &fs_devices->alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) device->fs_devices->rw_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) * in case of fs with no seed, srcdev->fs_devices will point
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * to fs_devices of fs_info. However when the dev being replaced is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) * a seed dev it will point to the seed's local fs_devices. In short
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) * srcdev will have its correct fs_devices in both the cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) fs_devices = srcdev->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) list_del_rcu(&srcdev->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) list_del(&srcdev->dev_alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) fs_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) fs_devices->missing_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) fs_devices->rw_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) if (srcdev->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) fs_devices->open_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) mutex_lock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) btrfs_close_bdev(srcdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) btrfs_free_device(srcdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) /* if this is no devs we rather delete the fs_devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) if (!fs_devices->num_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) * On a mounted FS, num_devices can't be zero unless it's a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) * seed. In case of a seed device being replaced, the replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) * target added to the sprout FS, so there will be no more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) * device left under the seed FS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) ASSERT(fs_devices->seeding);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) list_del_init(&fs_devices->seed_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) close_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) free_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) mutex_unlock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) btrfs_sysfs_remove_device(tgtdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) if (tgtdev->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) fs_devices->open_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) fs_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) btrfs_assign_next_active_device(tgtdev, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) list_del_rcu(&tgtdev->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) * The update_dev_time() with in btrfs_scratch_superblocks()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) * may lead to a call to btrfs_show_devname() which will try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) * to hold device_list_mutex. And here this device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) * is already out of device list, so we don't have to hold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * the device_list_mutex lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) tgtdev->name->str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) btrfs_close_bdev(tgtdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) btrfs_free_device(tgtdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) static struct btrfs_device *btrfs_find_device_by_path(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) struct btrfs_fs_info *fs_info, const char *device_path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) struct btrfs_super_block *disk_super;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) u64 devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) u8 *dev_uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) struct block_device *bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) fs_info->bdev_holder, 0, &bdev, &disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) devid = btrfs_stack_device_id(&disk_super->dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) dev_uuid = disk_super->dev_item.uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) if (btrfs_fs_incompat(fs_info, METADATA_UUID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) disk_super->metadata_uuid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) disk_super->fsid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) btrfs_release_disk_super(disk_super);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) if (!device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) device = ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) blkdev_put(bdev, FMODE_READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) * Lookup a device given by device id, or the path if the id is 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) struct btrfs_device *btrfs_find_device_by_devspec(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) struct btrfs_fs_info *fs_info, u64 devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) const char *device_path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) if (devid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) if (!device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) return ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) if (!device_path || !device_path[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) if (strcmp(device_path, "missing") == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) /* Find first missing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) list_for_each_entry(device, &fs_info->fs_devices->devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) &device->dev_state) && !device->bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) return ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) return btrfs_find_device_by_path(fs_info, device_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) * does all the dirty work required for changing file system's UUID.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) struct btrfs_fs_devices *old_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) struct btrfs_fs_devices *seed_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) struct btrfs_super_block *disk_super = fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) u64 super_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) lockdep_assert_held(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) if (!fs_devices->seeding)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) * Private copy of the seed devices, anchored at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) * fs_info->fs_devices->seed_list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) seed_devices = alloc_fs_devices(NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) if (IS_ERR(seed_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) return PTR_ERR(seed_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) * It's necessary to retain a copy of the original seed fs_devices in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) * fs_uuids so that filesystems which have been seeded can successfully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) * reference the seed device from open_seed_devices. This also supports
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) * multiple fs seed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) old_devices = clone_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) if (IS_ERR(old_devices)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) kfree(seed_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) return PTR_ERR(old_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) list_add(&old_devices->fs_list, &fs_uuids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) seed_devices->opened = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) INIT_LIST_HEAD(&seed_devices->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) INIT_LIST_HEAD(&seed_devices->alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) mutex_init(&seed_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) synchronize_rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) list_for_each_entry(device, &seed_devices->devices, dev_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) device->fs_devices = seed_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) fs_devices->seeding = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) fs_devices->num_devices = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) fs_devices->open_devices = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) fs_devices->missing_devices = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) fs_devices->rotating = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) list_add(&seed_devices->seed_list, &fs_devices->seed_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) generate_random_uuid(fs_devices->fsid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) super_flags = btrfs_super_flags(disk_super) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) ~BTRFS_SUPER_FLAG_SEEDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) btrfs_set_super_flags(disk_super, super_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) * Store the expected generation for seed devices in device items.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) struct btrfs_fs_info *fs_info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) struct btrfs_root *root = fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) struct btrfs_dev_item *dev_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) u8 fs_uuid[BTRFS_FSID_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) u8 dev_uuid[BTRFS_UUID_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) u64 devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) key.type = BTRFS_DEV_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) next_slot:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) if (path->slots[0] >= btrfs_header_nritems(leaf)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) ret = btrfs_next_leaf(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) key.type != BTRFS_DEV_ITEM_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) dev_item = btrfs_item_ptr(leaf, path->slots[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) struct btrfs_dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) devid = btrfs_device_id(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) BTRFS_UUID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) fs_uuid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) BUG_ON(!device); /* Logic error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) if (device->fs_devices->seeding) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) btrfs_set_device_generation(leaf, dev_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) device->generation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) btrfs_mark_buffer_dirty(leaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) path->slots[0]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) goto next_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) struct btrfs_root *root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) struct block_device *bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) struct super_block *sb = fs_info->sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) struct rcu_string *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) u64 orig_super_total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) u64 orig_super_num_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) int seeding_dev = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) bool locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) if (sb_rdonly(sb) && !fs_devices->seeding)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) fs_info->bdev_holder);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) if (IS_ERR(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) return PTR_ERR(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) if (fs_devices->seeding) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) seeding_dev = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) down_write(&sb->s_umount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) mutex_lock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) sync_blockdev(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) if (device->bdev == bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) ret = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) device = btrfs_alloc_device(fs_info, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) if (IS_ERR(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) /* we can safely leave the fs_devices entry around */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) ret = PTR_ERR(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) name = rcu_string_strdup(device_path, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) if (!name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) goto error_free_device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) rcu_assign_pointer(device->name, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) trans = btrfs_start_transaction(root, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) goto error_free_device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) q = bdev_get_queue(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) device->generation = trans->transid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) device->io_width = fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) device->io_align = fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) device->sector_size = fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) device->total_bytes = round_down(i_size_read(bdev->bd_inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) device->disk_total_bytes = device->total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) device->commit_total_bytes = device->total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) device->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) device->bdev = bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) device->mode = FMODE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) device->dev_stats_valid = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) if (seeding_dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) sb->s_flags &= ~SB_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) ret = btrfs_prepare_sprout(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) goto error_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) device->fs_devices = fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) list_add_rcu(&device->dev_list, &fs_devices->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) fs_devices->num_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) fs_devices->open_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) fs_devices->rw_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) fs_devices->total_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) fs_devices->total_rw_bytes += device->total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) if (!blk_queue_nonrot(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) fs_devices->rotating = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) btrfs_set_super_total_bytes(fs_info->super_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) round_down(orig_super_total_bytes + device->total_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) fs_info->sectorsize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) btrfs_set_super_num_devices(fs_info->super_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) orig_super_num_devices + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) * we've got more storage, clear any full flags on the space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) * infos
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) btrfs_clear_space_info_full(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) /* Add sysfs device entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) btrfs_sysfs_add_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) if (seeding_dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) ret = init_first_rw_device(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) goto error_sysfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) ret = btrfs_add_dev_item(trans, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) goto error_sysfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) if (seeding_dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) ret = btrfs_finish_sprout(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) goto error_sysfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) * fs_devices now represents the newly sprouted filesystem and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) * its fsid has been changed by btrfs_prepare_sprout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) btrfs_sysfs_update_sprout_fsid(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) ret = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) if (seeding_dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) mutex_unlock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) up_write(&sb->s_umount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) if (ret) /* transaction commit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) ret = btrfs_relocate_sys_chunks(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) btrfs_handle_fs_error(fs_info, ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) trans = btrfs_attach_transaction(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) if (PTR_ERR(trans) == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) trans = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) goto error_sysfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) ret = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) * Now that we have written a new super block to this device, check all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) * other fs_devices list if device_path alienates any other scanned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) * device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) * We can ignore the return value as it typically returns -EINVAL and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) * only succeeds if the device was an alien.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) btrfs_forget_devices(device_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) /* Update ctime/mtime for blkid or udev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) update_dev_time(device_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) error_sysfs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) btrfs_sysfs_remove_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) mutex_lock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) list_del_rcu(&device->dev_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) list_del(&device->dev_alloc_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) fs_info->fs_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) fs_info->fs_devices->open_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) fs_info->fs_devices->rw_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) fs_info->fs_devices->total_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) btrfs_set_super_total_bytes(fs_info->super_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) orig_super_total_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) btrfs_set_super_num_devices(fs_info->super_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) orig_super_num_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) error_trans:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) if (seeding_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) sb->s_flags |= SB_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) if (trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) error_free_device:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) btrfs_free_device(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) blkdev_put(bdev, FMODE_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) mutex_unlock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) up_write(&sb->s_umount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) struct btrfs_root *root = device->fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) struct btrfs_dev_item *dev_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) key.type = BTRFS_DEV_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) key.offset = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) btrfs_set_device_id(leaf, dev_item, device->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) btrfs_set_device_type(leaf, dev_item, device->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) btrfs_set_device_io_align(leaf, dev_item, device->io_align);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) btrfs_set_device_io_width(leaf, dev_item, device->io_width);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) btrfs_set_device_total_bytes(leaf, dev_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) btrfs_device_get_disk_total_bytes(device));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) btrfs_set_device_bytes_used(leaf, dev_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) btrfs_device_get_bytes_used(device));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) btrfs_mark_buffer_dirty(leaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) int btrfs_grow_device(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) struct btrfs_device *device, u64 new_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) struct btrfs_fs_info *fs_info = device->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) struct btrfs_super_block *super_copy = fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) u64 old_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) u64 diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) new_size = round_down(new_size, fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) old_total = btrfs_super_total_bytes(super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) if (new_size <= device->total_bytes ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) btrfs_set_super_total_bytes(super_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) round_down(old_total + diff, fs_info->sectorsize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) device->fs_devices->total_rw_bytes += diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) btrfs_device_set_total_bytes(device, new_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) btrfs_device_set_disk_total_bytes(device, new_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) btrfs_clear_space_info_full(device->fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) if (list_empty(&device->post_commit_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) list_add_tail(&device->post_commit_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) &trans->transaction->dev_update_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) return btrfs_update_device(trans, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) struct btrfs_fs_info *fs_info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) struct btrfs_root *root = fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) key.offset = chunk_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) key.type = BTRFS_CHUNK_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) else if (ret > 0) { /* Logic error or corruption */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) btrfs_handle_fs_error(fs_info, -ENOENT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) "Failed lookup while freeing chunk.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) ret = btrfs_del_item(trans, root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) btrfs_handle_fs_error(fs_info, ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) "Failed to delete chunk item.");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) struct btrfs_super_block *super_copy = fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) struct btrfs_disk_key *disk_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) struct btrfs_chunk *chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) u8 *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) u32 num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) u32 array_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) u32 len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) u32 cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) array_size = btrfs_super_sys_array_size(super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) ptr = super_copy->sys_chunk_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) cur = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) while (cur < array_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) disk_key = (struct btrfs_disk_key *)ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) btrfs_disk_key_to_cpu(&key, disk_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) len = sizeof(*disk_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) if (key.type == BTRFS_CHUNK_ITEM_KEY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) chunk = (struct btrfs_chunk *)(ptr + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) num_stripes = btrfs_stack_chunk_num_stripes(chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) len += btrfs_chunk_item_size(num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) key.offset == chunk_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) memmove(ptr, ptr + len, array_size - (cur + len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) array_size -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) btrfs_set_super_sys_array_size(super_copy, array_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) ptr += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) cur += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) * btrfs_get_chunk_map() - Find the mapping containing the given logical extent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) * @logical: Logical block offset in bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) * @length: Length of extent in bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) * Return: Chunk mapping or ERR_PTR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) u64 logical, u64 length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) struct extent_map_tree *em_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) em_tree = &fs_info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) read_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) em = lookup_extent_mapping(em_tree, logical, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) read_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) if (!em) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) btrfs_crit(fs_info, "unable to find logical %llu length %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) logical, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) if (em->start > logical || em->start + em->len < logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) btrfs_crit(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) logical, length, em->start, em->start + em->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) /* callers are responsible for dropping em's ref. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) return em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) struct btrfs_fs_info *fs_info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) u64 dev_extent_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) int i, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) if (IS_ERR(em)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) * This is a logic error, but we don't want to just rely on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) * user having built with ASSERT enabled, so if ASSERT doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) * do anything we still error out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) return PTR_ERR(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) check_system_chunk(trans, map->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) * Take the device list mutex to prevent races with the final phase of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) * a device replace operation that replaces the device object associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) for (i = 0; i < map->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) struct btrfs_device *device = map->stripes[i].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) ret = btrfs_free_dev_extent(trans, device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) map->stripes[i].physical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) &dev_extent_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) if (device->bytes_used > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) btrfs_device_set_bytes_used(device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) device->bytes_used - dev_extent_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) btrfs_clear_space_info_full(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) ret = btrfs_update_device(trans, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) ret = btrfs_free_chunk(trans, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) ret = btrfs_remove_block_group(trans, chunk_offset, em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) /* once for us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) struct btrfs_root *root = fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) struct btrfs_block_group *block_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) * Prevent races with automatic removal of unused block groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) * After we relocate and before we remove the chunk with offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) * chunk_offset, automatic removal of the block group can kick in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) * resulting in a failure when calling btrfs_remove_chunk() below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) * Make sure to acquire this mutex before doing a tree search (dev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) * or chunk trees) to find chunks. Otherwise the cleaner kthread might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) * we release the path used to search the chunk/dev tree and before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) * the current task acquires this mutex and calls us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) /* step one, relocate all the extents inside this chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) btrfs_scrub_pause(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) ret = btrfs_relocate_block_group(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) btrfs_scrub_continue(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) if (!block_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) btrfs_put_block_group(block_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) trans = btrfs_start_trans_remove_block_group(root->fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) btrfs_handle_fs_error(root->fs_info, ret, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) * step two, delete the device extents and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) * chunk tree entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) ret = btrfs_remove_chunk(trans, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) struct btrfs_root *chunk_root = fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) struct btrfs_chunk *chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) struct btrfs_key found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) u64 chunk_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) bool retried = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) int failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) key.offset = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) key.type = BTRFS_CHUNK_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) mutex_lock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) BUG_ON(ret == 0); /* Corruption */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) ret = btrfs_previous_item(chunk_root, path, key.objectid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) key.type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) chunk = btrfs_item_ptr(leaf, path->slots[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) struct btrfs_chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) chunk_type = btrfs_chunk_type(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) ret = btrfs_relocate_chunk(fs_info, found_key.offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) if (ret == -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) if (found_key.offset == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) key.offset = found_key.offset - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) if (failed && !retried) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) retried = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) } else if (WARN_ON(failed && retried)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) * return 1 : allocate a data chunk successfully,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) * return <0: errors during allocating a data chunk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) * return 0 : no need to allocate a data chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) u64 chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) struct btrfs_block_group *cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) u64 bytes_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) u64 chunk_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) cache = btrfs_lookup_block_group(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) ASSERT(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) chunk_type = cache->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) btrfs_put_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) if (!(chunk_type & BTRFS_BLOCK_GROUP_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) spin_lock(&fs_info->data_sinfo->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) bytes_used = fs_info->data_sinfo->bytes_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) spin_unlock(&fs_info->data_sinfo->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) if (!bytes_used) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) trans = btrfs_join_transaction(fs_info->tree_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) if (IS_ERR(trans))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) return PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) static int insert_balance_item(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) struct btrfs_balance_control *bctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) struct btrfs_root *root = fs_info->tree_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) struct btrfs_balance_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) struct btrfs_disk_balance_args disk_bargs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) int ret, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) trans = btrfs_start_transaction(root, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) return PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) key.objectid = BTRFS_BALANCE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) key.type = BTRFS_TEMPORARY_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) ret = btrfs_insert_empty_item(trans, root, path, &key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) sizeof(*item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) btrfs_set_balance_data(leaf, item, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) btrfs_set_balance_meta(leaf, item, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) btrfs_set_balance_sys(leaf, item, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) btrfs_set_balance_flags(leaf, item, bctl->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) btrfs_mark_buffer_dirty(leaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) err = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) if (err && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) static int del_balance_item(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) struct btrfs_root *root = fs_info->tree_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) int ret, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) trans = btrfs_start_transaction_fallback_global_rsv(root, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) return PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) key.objectid = BTRFS_BALANCE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) key.type = BTRFS_TEMPORARY_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) ret = btrfs_del_item(trans, root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) err = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) if (err && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) * This is a heuristic used to reduce the number of chunks balanced on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) * resume after balance was interrupted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) static void update_balance_args(struct btrfs_balance_control *bctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) * Turn on soft mode for chunk types that were being converted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) * Turn on usage filter if is not already used. The idea is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) * that chunks that we have already balanced should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) * reasonably full. Don't do it for chunks that are being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) * converted - that will keep us from relocating unconverted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) * (albeit full) chunks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) bctl->data.usage = 90;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) bctl->sys.usage = 90;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) bctl->meta.usage = 90;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) * Clear the balance status in fs_info and delete the balance item from disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) static void reset_balance_state(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) struct btrfs_balance_control *bctl = fs_info->balance_ctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) BUG_ON(!fs_info->balance_ctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) fs_info->balance_ctl = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) kfree(bctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) ret = del_balance_item(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) btrfs_handle_fs_error(fs_info, ret, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) * Balance filters. Return 1 if chunk should be filtered out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) * (should not be balanced).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) static int chunk_profiles_filter(u64 chunk_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) chunk_type = chunk_to_extended(chunk_type) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) BTRFS_EXTENDED_PROFILE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) if (bargs->profiles & chunk_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) struct btrfs_block_group *cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) u64 chunk_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) u64 user_thresh_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) u64 user_thresh_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) int ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) cache = btrfs_lookup_block_group(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) chunk_used = cache->used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) if (bargs->usage_min == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) user_thresh_min = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) user_thresh_min = div_factor_fine(cache->length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) bargs->usage_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) if (bargs->usage_max == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) user_thresh_max = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) else if (bargs->usage_max > 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) user_thresh_max = cache->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) user_thresh_max = div_factor_fine(cache->length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) bargs->usage_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) btrfs_put_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) u64 chunk_offset, struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) struct btrfs_block_group *cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) u64 chunk_used, user_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) int ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) cache = btrfs_lookup_block_group(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) chunk_used = cache->used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) if (bargs->usage_min == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) user_thresh = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) else if (bargs->usage > 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) user_thresh = cache->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) user_thresh = div_factor_fine(cache->length, bargs->usage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) if (chunk_used < user_thresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) btrfs_put_block_group(cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) static int chunk_devid_filter(struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) struct btrfs_chunk *chunk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) struct btrfs_stripe *stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) stripe = btrfs_stripe_nr(chunk, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) static u64 calc_data_stripes(u64 type, int num_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) const int index = btrfs_bg_flags_to_raid_index(type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) const int ncopies = btrfs_raid_array[index].ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) const int nparity = btrfs_raid_array[index].nparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) if (nparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) return num_stripes - nparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) return num_stripes / ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) /* [pstart, pend) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) static int chunk_drange_filter(struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) struct btrfs_chunk *chunk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) struct btrfs_stripe *stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) u64 stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) u64 stripe_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) u64 type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) int factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) type = btrfs_chunk_type(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) factor = calc_data_stripes(type, num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) stripe = btrfs_stripe_nr(chunk, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) stripe_offset = btrfs_stripe_offset(leaf, stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) stripe_length = btrfs_chunk_length(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) stripe_length = div_u64(stripe_length, factor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) if (stripe_offset < bargs->pend &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) stripe_offset + stripe_length > bargs->pstart)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) /* [vstart, vend) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) static int chunk_vrange_filter(struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) struct btrfs_chunk *chunk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) u64 chunk_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) if (chunk_offset < bargs->vend &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) /* at least part of the chunk is inside this vrange */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) static int chunk_stripes_range_filter(struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) struct btrfs_chunk *chunk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) if (bargs->stripes_min <= num_stripes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) && num_stripes <= bargs->stripes_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) static int chunk_soft_convert_filter(u64 chunk_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) struct btrfs_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) chunk_type = chunk_to_extended(chunk_type) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) BTRFS_EXTENDED_PROFILE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) if (bargs->target == chunk_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) static int should_balance_chunk(struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) struct btrfs_chunk *chunk, u64 chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) struct btrfs_fs_info *fs_info = leaf->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) struct btrfs_balance_control *bctl = fs_info->balance_ctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) struct btrfs_balance_args *bargs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) u64 chunk_type = btrfs_chunk_type(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) /* type filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) bargs = &bctl->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) bargs = &bctl->sys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) bargs = &bctl->meta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) /* profiles filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) chunk_profiles_filter(chunk_type, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) /* usage filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) chunk_usage_filter(fs_info, chunk_offset, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) /* devid filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) chunk_devid_filter(leaf, chunk, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) /* drange filter, makes sense only with devid filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) chunk_drange_filter(leaf, chunk, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) /* vrange filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) /* stripes filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) chunk_stripes_range_filter(leaf, chunk, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) /* soft profile changing mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) chunk_soft_convert_filter(chunk_type, bargs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) * limited by count, must be the last filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) if (bargs->limit == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) bargs->limit--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) * Same logic as the 'limit' filter; the minimum cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) * determined here because we do not have the global information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) * about the count of all chunks that satisfy the filters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) if (bargs->limit_max == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) bargs->limit_max--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) static int __btrfs_balance(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) struct btrfs_balance_control *bctl = fs_info->balance_ctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) struct btrfs_root *chunk_root = fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) u64 chunk_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) struct btrfs_chunk *chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) struct btrfs_path *path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) struct btrfs_key found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) int enospc_errors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) bool counting = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) /* The single value limit and min/max limits use the same bytes in the */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) u64 limit_data = bctl->data.limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) u64 limit_meta = bctl->meta.limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) u64 limit_sys = bctl->sys.limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) u32 count_data = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) u32 count_meta = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) u32 count_sys = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) int chunk_reserved = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) if (!path) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) /* zero out stat counters */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) memset(&bctl->stat, 0, sizeof(bctl->stat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) if (!counting) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) * The single value limit and min/max limits use the same bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) * in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) bctl->data.limit = limit_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) bctl->meta.limit = limit_meta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) bctl->sys.limit = limit_sys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) key.offset = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) key.type = BTRFS_CHUNK_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) atomic_read(&fs_info->balance_cancel_req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) ret = -ECANCELED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) mutex_lock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) * this shouldn't happen, it means the last relocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) * failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) BUG(); /* FIXME break ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) ret = btrfs_previous_item(chunk_root, path, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) BTRFS_CHUNK_ITEM_KEY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) btrfs_item_key_to_cpu(leaf, &found_key, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) if (found_key.objectid != key.objectid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) chunk_type = btrfs_chunk_type(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) if (!counting) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) bctl->stat.considered++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) ret = should_balance_chunk(leaf, chunk, found_key.offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) if (counting) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) bctl->stat.expected++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) count_data++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) count_sys++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) count_meta++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) * Apply limit_min filter, no need to check if the LIMITS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) * filter is used, limit_min is 0 by default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) count_data < bctl->data.limit_min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) count_meta < bctl->meta.limit_min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) count_sys < bctl->sys.limit_min)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) if (!chunk_reserved) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) * We may be relocating the only data chunk we have,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) * which could potentially end up with losing data's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) * raid profile, so lets allocate an empty one in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) * advance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) ret = btrfs_may_alloc_data_chunk(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) found_key.offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) } else if (ret == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) chunk_reserved = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) ret = btrfs_relocate_chunk(fs_info, found_key.offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) if (ret == -ENOSPC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) enospc_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) } else if (ret == -ETXTBSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) btrfs_info(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) "skipping relocation of block group %llu due to active swapfile",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) found_key.offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) } else if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) bctl->stat.completed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) if (found_key.offset == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) key.offset = found_key.offset - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) if (counting) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) counting = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) if (enospc_errors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) btrfs_info(fs_info, "%d enospc errors during balance",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) enospc_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) * alloc_profile_is_valid - see if a given profile is valid and reduced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) * @flags: profile to validate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) * @extended: if true @flags is treated as an extended profile
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) static int alloc_profile_is_valid(u64 flags, int extended)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) BTRFS_BLOCK_GROUP_PROFILE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) /* 1) check that all other bits are zeroed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) if (flags & ~mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) /* 2) see if profile is reduced */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) if (flags == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) return !extended; /* "0" is valid for usual profiles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) return has_single_bit_set(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) static inline int balance_need_close(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) /* cancel requested || normal exit path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) return atomic_read(&fs_info->balance_cancel_req) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) (atomic_read(&fs_info->balance_pause_req) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) atomic_read(&fs_info->balance_cancel_req) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) * Validate target profile against allowed profiles and return true if it's OK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) * Otherwise print the error message and return false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) const struct btrfs_balance_args *bargs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) u64 allowed, const char *type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) /* Profile is valid and does not have bits outside of the allowed set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) if (alloc_profile_is_valid(bargs->target, 1) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) (bargs->target & ~allowed) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) btrfs_err(fs_info, "balance: invalid convert %s profile %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) type, btrfs_bg_type_to_raid_name(bargs->target));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) * Fill @buf with textual description of balance filter flags @bargs, up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) * @size_buf including the terminating null. The output may be trimmed if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) * does not fit into the provided buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) u32 size_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) u32 size_bp = size_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) char *bp = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) u64 flags = bargs->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) char tmp_buf[128] = {'\0'};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) if (!flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) #define CHECK_APPEND_NOARG(a) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) ret = snprintf(bp, size_bp, (a)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) if (ret < 0 || ret >= size_bp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) goto out_overflow; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) size_bp -= ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) bp += ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) #define CHECK_APPEND_1ARG(a, v1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) ret = snprintf(bp, size_bp, (a), (v1)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) if (ret < 0 || ret >= size_bp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) goto out_overflow; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) size_bp -= ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) bp += ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) #define CHECK_APPEND_2ARG(a, v1, v2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) ret = snprintf(bp, size_bp, (a), (v1), (v2)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) if (ret < 0 || ret >= size_bp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) goto out_overflow; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) size_bp -= ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) bp += ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) if (flags & BTRFS_BALANCE_ARGS_CONVERT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) CHECK_APPEND_1ARG("convert=%s,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) btrfs_bg_type_to_raid_name(bargs->target));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) if (flags & BTRFS_BALANCE_ARGS_SOFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) CHECK_APPEND_NOARG("soft,");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) if (flags & BTRFS_BALANCE_ARGS_PROFILES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) btrfs_describe_block_groups(bargs->profiles, tmp_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) sizeof(tmp_buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) CHECK_APPEND_1ARG("profiles=%s,", tmp_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) if (flags & BTRFS_BALANCE_ARGS_USAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) CHECK_APPEND_1ARG("usage=%llu,", bargs->usage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) CHECK_APPEND_2ARG("usage=%u..%u,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) bargs->usage_min, bargs->usage_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) if (flags & BTRFS_BALANCE_ARGS_DEVID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) CHECK_APPEND_1ARG("devid=%llu,", bargs->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) if (flags & BTRFS_BALANCE_ARGS_DRANGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) CHECK_APPEND_2ARG("drange=%llu..%llu,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) bargs->pstart, bargs->pend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) if (flags & BTRFS_BALANCE_ARGS_VRANGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) CHECK_APPEND_2ARG("vrange=%llu..%llu,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) bargs->vstart, bargs->vend);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) if (flags & BTRFS_BALANCE_ARGS_LIMIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) CHECK_APPEND_1ARG("limit=%llu,", bargs->limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) CHECK_APPEND_2ARG("limit=%u..%u,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) bargs->limit_min, bargs->limit_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) CHECK_APPEND_2ARG("stripes=%u..%u,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) bargs->stripes_min, bargs->stripes_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) #undef CHECK_APPEND_2ARG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) #undef CHECK_APPEND_1ARG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) #undef CHECK_APPEND_NOARG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) out_overflow:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) if (size_bp < size_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) buf[size_buf - size_bp - 1] = '\0'; /* remove last , */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) buf[0] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) u32 size_buf = 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) char tmp_buf[192] = {'\0'};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) char *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) char *bp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) u32 size_bp = size_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) struct btrfs_balance_control *bctl = fs_info->balance_ctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) buf = kzalloc(size_buf, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) if (!buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) bp = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) #define CHECK_APPEND_1ARG(a, v1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) ret = snprintf(bp, size_bp, (a), (v1)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) if (ret < 0 || ret >= size_bp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) goto out_overflow; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) size_bp -= ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) bp += ret; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) if (bctl->flags & BTRFS_BALANCE_FORCE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) CHECK_APPEND_1ARG("%s", "-f ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) if (bctl->flags & BTRFS_BALANCE_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) CHECK_APPEND_1ARG("-d%s ", tmp_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) if (bctl->flags & BTRFS_BALANCE_METADATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) CHECK_APPEND_1ARG("-m%s ", tmp_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) if (bctl->flags & BTRFS_BALANCE_SYSTEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) CHECK_APPEND_1ARG("-s%s ", tmp_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) #undef CHECK_APPEND_1ARG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) out_overflow:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) if (size_bp < size_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) buf[size_buf - size_bp - 1] = '\0'; /* remove last " " */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) btrfs_info(fs_info, "balance: %s %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) (bctl->flags & BTRFS_BALANCE_RESUME) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) "resume" : "start", buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) * Should be called with balance mutexe held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) int btrfs_balance(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) struct btrfs_balance_control *bctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) struct btrfs_ioctl_balance_args *bargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) u64 meta_target, data_target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) u64 allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) int mixed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) u64 num_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) bool reducing_redundancy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) if (btrfs_fs_closing(fs_info) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) atomic_read(&fs_info->balance_pause_req) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) btrfs_should_cancel_balance(fs_info)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) allowed = btrfs_super_incompat_flags(fs_info->super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) mixed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) * In case of mixed groups both data and meta should be picked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) * and identical options should be given for both of them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) if (mixed && (bctl->flags & allowed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) !(bctl->flags & BTRFS_BALANCE_METADATA) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) "balance: mixed groups data and metadata options must be the same");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) * rw_devices will not change at the moment, device add/delete/replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) * are exclusive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) num_devices = fs_info->fs_devices->rw_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) * SINGLE profile on-disk has no profile bit, but in-memory we have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) * special bit for it, to make it easier to distinguish. Thus we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) * to set it manually, or balance would refuse the profile.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) if (num_devices >= btrfs_raid_array[i].devs_min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) allowed |= btrfs_raid_array[i].bg_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) if (!validate_convert_profile(fs_info, &bctl->data, allowed, "data") ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) !validate_convert_profile(fs_info, &bctl->meta, allowed, "metadata") ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) !validate_convert_profile(fs_info, &bctl->sys, allowed, "system")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) * Allow to reduce metadata or system integrity only if force set for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) * profiles with redundancy (copies, parity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) allowed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) if (btrfs_raid_array[i].ncopies >= 2 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) btrfs_raid_array[i].tolerated_failures >= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) allowed |= btrfs_raid_array[i].bg_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) seq = read_seqbegin(&fs_info->profiles_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) (fs_info->avail_system_alloc_bits & allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) !(bctl->sys.target & allowed)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) (fs_info->avail_metadata_alloc_bits & allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) !(bctl->meta.target & allowed)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) reducing_redundancy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) reducing_redundancy = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) /* if we're not converting, the target field is uninitialized */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) bctl->meta.target : fs_info->avail_metadata_alloc_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) bctl->data.target : fs_info->avail_data_alloc_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) } while (read_seqretry(&fs_info->profiles_lock, seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) if (reducing_redundancy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) if (bctl->flags & BTRFS_BALANCE_FORCE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) btrfs_info(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) "balance: force reducing metadata redundancy");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) "balance: reduces metadata redundancy, use --force if you want this");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) "balance: metadata profile %s has lower redundancy than data profile %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) btrfs_bg_type_to_raid_name(meta_target),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) btrfs_bg_type_to_raid_name(data_target));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) if (fs_info->send_in_progress) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) btrfs_warn_rl(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) "cannot run balance while send operations are in progress (%d in progress)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) fs_info->send_in_progress);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) ret = insert_balance_item(fs_info, bctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) if (ret && ret != -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) BUG_ON(ret == -EEXIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) BUG_ON(fs_info->balance_ctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) fs_info->balance_ctl = bctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) BUG_ON(ret != -EEXIST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) update_balance_args(bctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) describe_balance_start_or_resume(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) ret = __btrfs_balance(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) btrfs_info(fs_info, "balance: paused");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) * Balance can be canceled by:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) * - Regular cancel request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) * Then ret == -ECANCELED and balance_cancel_req > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) * - Fatal signal to "btrfs" process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) * Either the signal caught by wait_reserve_ticket() and callers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) * got -EINTR, or caught by btrfs_should_cancel_balance() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) * got -ECANCELED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) * Either way, in this case balance_cancel_req = 0, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) * ret == -EINTR or ret == -ECANCELED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) * So here we only check the return value to catch canceled balance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) else if (ret == -ECANCELED || ret == -EINTR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) btrfs_info(fs_info, "balance: canceled");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) btrfs_info(fs_info, "balance: ended with status: %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) if (bargs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) memset(bargs, 0, sizeof(*bargs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) btrfs_update_ioctl_balance_args(fs_info, bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) balance_need_close(fs_info)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) reset_balance_state(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) btrfs_exclop_finish(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) wake_up(&fs_info->balance_wait_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) if (bctl->flags & BTRFS_BALANCE_RESUME)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) reset_balance_state(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) kfree(bctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) btrfs_exclop_finish(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) static int balance_kthread(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) struct btrfs_fs_info *fs_info = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) if (fs_info->balance_ctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) if (!fs_info->balance_ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) btrfs_info(fs_info, "balance: resume skipped");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) * A ro->rw remount sequence should continue with the paused balance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) * regardless of who pauses it, system or the user as of now, so set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) * the resume flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) return PTR_ERR_OR_ZERO(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) struct btrfs_balance_control *bctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) struct btrfs_balance_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) struct btrfs_disk_balance_args disk_bargs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) key.objectid = BTRFS_BALANCE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) key.type = BTRFS_TEMPORARY_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) if (ret > 0) { /* ret = -ENOENT; */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) if (!bctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) bctl->flags = btrfs_balance_flags(leaf, item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) bctl->flags |= BTRFS_BALANCE_RESUME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) btrfs_balance_data(leaf, item, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) btrfs_balance_meta(leaf, item, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) btrfs_balance_sys(leaf, item, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) * This should never happen, as the paused balance state is recovered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) * during mount without any chance of other exclusive ops to collide.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) * This gives the exclusive op status to balance and keeps in paused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) * state until user intervention (cancel or umount). If the ownership
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) * cannot be assigned, show a message but do not fail. The balance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) * is in a paused state and must have fs_info::balance_ctl properly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) * set up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) "balance: cannot set exclusive op status, resume manually");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) BUG_ON(fs_info->balance_ctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) spin_lock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) fs_info->balance_ctl = bctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) spin_unlock(&fs_info->balance_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) if (!fs_info->balance_ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) atomic_inc(&fs_info->balance_pause_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) wait_event(fs_info->balance_wait_q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) /* we are good with balance_ctl ripped off from under us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) atomic_dec(&fs_info->balance_pause_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) ret = -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) if (!fs_info->balance_ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) * A paused balance with the item stored on disk can be resumed at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) * mount time if the mount is read-write. Otherwise it's still paused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) * and we must not allow cancelling as it deletes the item.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) if (sb_rdonly(fs_info->sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) atomic_inc(&fs_info->balance_cancel_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) * if we are running just wait and return, balance item is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) * deleted in btrfs_balance in this case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) wait_event(fs_info->balance_wait_q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) * Lock released to allow other waiters to continue, we'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) * reexamine the status again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) mutex_lock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) if (fs_info->balance_ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) reset_balance_state(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) btrfs_exclop_finish(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) btrfs_info(fs_info, "balance: canceled");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) BUG_ON(fs_info->balance_ctl ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) atomic_dec(&fs_info->balance_cancel_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) mutex_unlock(&fs_info->balance_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) int btrfs_uuid_scan_kthread(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) struct btrfs_fs_info *fs_info = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) struct btrfs_root *root = fs_info->tree_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) struct btrfs_path *path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) struct extent_buffer *eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) struct btrfs_root_item root_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) u32 item_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) struct btrfs_trans_handle *trans = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) bool closing = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) if (!path) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) key.objectid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) key.type = BTRFS_ROOT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) if (btrfs_fs_closing(fs_info)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) closing = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) ret = btrfs_search_forward(root, &key, path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) BTRFS_OLDEST_GENERATION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) if (key.type != BTRFS_ROOT_ITEM_KEY ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) key.objectid != BTRFS_FS_TREE_OBJECTID) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) key.objectid > BTRFS_LAST_FREE_OBJECTID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) eb = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) item_size = btrfs_item_size_nr(eb, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) if (item_size < sizeof(root_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) read_extent_buffer(eb, &root_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) btrfs_item_ptr_offset(eb, slot),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) (int)sizeof(root_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) if (btrfs_root_refs(&root_item) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) if (!btrfs_is_empty_uuid(root_item.uuid) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) !btrfs_is_empty_uuid(root_item.received_uuid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) if (trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) goto update_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) * 1 - subvol uuid item
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) * 1 - received_subvol uuid item
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) trans = btrfs_start_transaction(fs_info->uuid_root, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) update_tree:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) if (!btrfs_is_empty_uuid(root_item.uuid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) ret = btrfs_uuid_tree_add(trans, root_item.uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) BTRFS_UUID_KEY_SUBVOL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) key.objectid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) btrfs_warn(fs_info, "uuid_tree_add failed %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) ret = btrfs_uuid_tree_add(trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) root_item.received_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) BTRFS_UUID_KEY_RECEIVED_SUBVOL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) key.objectid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) btrfs_warn(fs_info, "uuid_tree_add failed %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) if (trans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) ret = btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) trans = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) if (key.offset < (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) key.offset++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) key.type = BTRFS_ROOT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) } else if (key.objectid < (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) key.type = BTRFS_ROOT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) key.objectid++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) if (trans && !IS_ERR(trans))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) else if (!closing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) up(&fs_info->uuid_tree_rescan_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) struct btrfs_root *tree_root = fs_info->tree_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) struct btrfs_root *uuid_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) * 1 - root node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) * 1 - root item
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) trans = btrfs_start_transaction(tree_root, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) if (IS_ERR(trans))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) return PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) if (IS_ERR(uuid_root)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) ret = PTR_ERR(uuid_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) fs_info->uuid_root = uuid_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) ret = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) down(&fs_info->uuid_tree_rescan_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) if (IS_ERR(task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) /* fs_info->update_uuid_tree_gen remains 0 in all error case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) btrfs_warn(fs_info, "failed to start uuid_scan task");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) up(&fs_info->uuid_tree_rescan_sem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) return PTR_ERR(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) * shrinking a device means finding all of the device extents past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) * the new size, and then following the back refs to the chunks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) * The chunk relocation code actually frees the device extent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) struct btrfs_fs_info *fs_info = device->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) struct btrfs_root *root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) struct btrfs_trans_handle *trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) struct btrfs_dev_extent *dev_extent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) u64 length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) u64 chunk_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) int failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) bool retried = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) struct extent_buffer *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) struct btrfs_super_block *super_copy = fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) u64 old_total = btrfs_super_total_bytes(super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) u64 old_size = btrfs_device_get_total_bytes(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) u64 diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) new_size = round_down(new_size, fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) start = new_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) diff = round_down(old_size - new_size, fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) path->reada = READA_BACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) trans = btrfs_start_transaction(root, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) return PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) btrfs_device_set_total_bytes(device, new_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) device->fs_devices->total_rw_bytes -= diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) atomic64_sub(diff, &fs_info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) * Once the device's size has been set to the new size, ensure all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) * in-memory chunks are synced to disk so that the loop below sees them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) * and relocates them accordingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) if (contains_pending_extent(device, &start, diff)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) ret = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) key.objectid = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) key.offset = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) key.type = BTRFS_DEV_EXTENT_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) mutex_lock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) ret = btrfs_previous_item(root, path, 0, key.type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) l = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) btrfs_item_key_to_cpu(l, &key, path->slots[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) if (key.objectid != device->devid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) length = btrfs_dev_extent_length(l, dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) if (key.offset + length <= new_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) * We may be relocating the only data chunk we have,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) * which could potentially end up with losing data's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) * raid profile, so lets allocate an empty one in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) * advance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) ret = btrfs_relocate_chunk(fs_info, chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) mutex_unlock(&fs_info->delete_unused_bgs_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) if (ret == -ENOSPC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) } else if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) if (ret == -ETXTBSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) "could not shrink block group %llu due to active swapfile",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) chunk_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) } while (key.offset-- > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) if (failed && !retried) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) retried = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) } else if (failed && retried) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) /* Shrinking succeeded, else we would be at "done". */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) trans = btrfs_start_transaction(root, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) if (IS_ERR(trans)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) ret = PTR_ERR(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) /* Clear all state bits beyond the shrunk device size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) CHUNK_STATE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) btrfs_device_set_disk_total_bytes(device, new_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) if (list_empty(&device->post_commit_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) list_add_tail(&device->post_commit_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) &trans->transaction->dev_update_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) WARN_ON(diff > old_total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) btrfs_set_super_total_bytes(super_copy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) round_down(old_total - diff, fs_info->sectorsize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) /* Now btrfs_update_device() will change the on-disk size. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) ret = btrfs_update_device(trans, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) btrfs_abort_transaction(trans, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) btrfs_end_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) ret = btrfs_commit_transaction(trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) btrfs_device_set_total_bytes(device, old_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) device->fs_devices->total_rw_bytes += diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) atomic64_add(diff, &fs_info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) struct btrfs_key *key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) struct btrfs_chunk *chunk, int item_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) struct btrfs_super_block *super_copy = fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) struct btrfs_disk_key disk_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) u32 array_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) u8 *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) array_size = btrfs_super_sys_array_size(super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) if (array_size + item_size + sizeof(disk_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) return -EFBIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) ptr = super_copy->sys_chunk_array + array_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) btrfs_cpu_key_to_disk(&disk_key, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) memcpy(ptr, &disk_key, sizeof(disk_key));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) ptr += sizeof(disk_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) memcpy(ptr, chunk, item_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) item_size += sizeof(disk_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) * sort the devices in descending order by max_avail, total_avail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) static int btrfs_cmp_device_info(const void *a, const void *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) const struct btrfs_device_info *di_a = a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) const struct btrfs_device_info *di_b = b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) if (di_a->max_avail > di_b->max_avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) if (di_a->max_avail < di_b->max_avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) if (di_a->total_avail > di_b->total_avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) if (di_a->total_avail < di_b->total_avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) btrfs_set_fs_incompat(info, RAID56);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) if (!(type & (BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) btrfs_set_fs_incompat(info, RAID1C34);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) * Structure used internally for __btrfs_alloc_chunk() function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) * Wraps needed parameters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) struct alloc_chunk_ctl {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) u64 type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) /* Total number of stripes to allocate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) int num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) /* sub_stripes info for map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) int sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) /* Stripes per device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) int dev_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) /* Maximum number of devices to use */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) int devs_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) /* Minimum number of devices to use */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) int devs_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) /* ndevs has to be a multiple of this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) int devs_increment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) /* Number of copies */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) int ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) /* Number of stripes worth of bytes to store parity information */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) int nparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) u64 max_stripe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) u64 max_chunk_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) u64 dev_extent_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) u64 stripe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) u64 chunk_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) int ndevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) static void init_alloc_chunk_ctl_policy_regular(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) struct alloc_chunk_ctl *ctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) u64 type = ctl->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) if (type & BTRFS_BLOCK_GROUP_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) ctl->max_stripe_size = SZ_1G;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) /* For larger filesystems, use larger metadata chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) ctl->max_stripe_size = SZ_1G;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) ctl->max_stripe_size = SZ_256M;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) ctl->max_chunk_size = ctl->max_stripe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) ctl->max_stripe_size = SZ_32M;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) ctl->max_chunk_size = 2 * ctl->max_stripe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) ctl->devs_max = min_t(int, ctl->devs_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) BTRFS_MAX_DEVS_SYS_CHUNK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) /* We don't want a chunk larger than 10% of writable space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) ctl->max_chunk_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) struct alloc_chunk_ctl *ctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) int index = btrfs_bg_flags_to_raid_index(ctl->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) ctl->sub_stripes = btrfs_raid_array[index].sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) ctl->dev_stripes = btrfs_raid_array[index].dev_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) ctl->devs_max = btrfs_raid_array[index].devs_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) if (!ctl->devs_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) ctl->devs_max = BTRFS_MAX_DEVS(fs_devices->fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) ctl->devs_min = btrfs_raid_array[index].devs_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) ctl->devs_increment = btrfs_raid_array[index].devs_increment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) ctl->ncopies = btrfs_raid_array[index].ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) ctl->nparity = btrfs_raid_array[index].nparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) ctl->ndevs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) switch (fs_devices->chunk_alloc_policy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) case BTRFS_CHUNK_ALLOC_REGULAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) init_alloc_chunk_ctl_policy_regular(fs_devices, ctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) static int gather_device_info(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) struct alloc_chunk_ctl *ctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) struct btrfs_device_info *devices_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) struct btrfs_fs_info *info = fs_devices->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) u64 total_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) u64 dev_extent_want = ctl->max_stripe_size * ctl->dev_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) int ndevs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) u64 max_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) u64 dev_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) * in the first pass through the devices list, we gather information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) * about the available holes on each device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) WARN(1, KERN_ERR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) "BTRFS: read-only device in alloc_list\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) &device->dev_state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) if (device->total_bytes > device->bytes_used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) total_avail = device->total_bytes - device->bytes_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) total_avail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) /* If there is no space on this device, skip it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) if (total_avail < ctl->dev_extent_min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) ret = find_free_dev_extent(device, dev_extent_want, &dev_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) &max_avail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) if (ret && ret != -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) max_avail = dev_extent_want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) if (max_avail < ctl->dev_extent_min) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) if (btrfs_test_opt(info, ENOSPC_DEBUG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) btrfs_debug(info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) "%s: devid %llu has no free space, have=%llu want=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) __func__, device->devid, max_avail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) ctl->dev_extent_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) if (ndevs == fs_devices->rw_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) WARN(1, "%s: found more than %llu devices\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) __func__, fs_devices->rw_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) devices_info[ndevs].dev_offset = dev_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) devices_info[ndevs].max_avail = max_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) devices_info[ndevs].total_avail = total_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) devices_info[ndevs].dev = device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) ++ndevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) ctl->ndevs = ndevs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) * now sort the devices by hole size / available space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) btrfs_cmp_device_info, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) struct btrfs_device_info *devices_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) /* Number of stripes that count for block group size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) int data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) * The primary goal is to maximize the number of stripes, so use as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) * many devices as possible, even if the stripes are not maximum sized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) * The DUP profile stores more than one stripe per device, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) * max_avail is the total size so we have to adjust.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) ctl->stripe_size = div_u64(devices_info[ctl->ndevs - 1].max_avail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) ctl->dev_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) /* This will have to be fixed for RAID1 and RAID10 over more drives */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) * Use the number of data stripes to figure out how big this chunk is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) * really going to be in terms of logical address space, and compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) * that answer with the max chunk size. If it's higher, we try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) * reduce stripe_size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) * Reduce stripe_size, round it up to a 16MB boundary again and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) * then use it, unless it ends up being even bigger than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) * previous value we had already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) data_stripes), SZ_16M),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) ctl->stripe_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) /* Align to BTRFS_STRIPE_LEN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) ctl->chunk_size = ctl->stripe_size * data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) static int decide_stripe_size(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) struct alloc_chunk_ctl *ctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) struct btrfs_device_info *devices_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) struct btrfs_fs_info *info = fs_devices->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) * Round down to number of usable stripes, devs_increment can be any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) * number so we can't use round_down() that requires power of 2, while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) * rounddown is safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) ctl->ndevs = rounddown(ctl->ndevs, ctl->devs_increment);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) if (ctl->ndevs < ctl->devs_min) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) btrfs_debug(info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) "%s: not enough devices with free space: have=%d minimum required=%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) __func__, ctl->ndevs, ctl->devs_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) ctl->ndevs = min(ctl->ndevs, ctl->devs_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) switch (fs_devices->chunk_alloc_policy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) case BTRFS_CHUNK_ALLOC_REGULAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) return decide_stripe_size_regular(ctl, devices_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) static int create_chunk(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) struct alloc_chunk_ctl *ctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) struct btrfs_device_info *devices_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) struct btrfs_fs_info *info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) struct map_lookup *map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) struct extent_map_tree *em_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) u64 start = ctl->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) u64 type = ctl->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) if (!map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) map->num_stripes = ctl->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) for (i = 0; i < ctl->ndevs; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) for (j = 0; j < ctl->dev_stripes; ++j) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) int s = i * ctl->dev_stripes + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) map->stripes[s].dev = devices_info[i].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) map->stripes[s].physical = devices_info[i].dev_offset +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) j * ctl->stripe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) map->stripe_len = BTRFS_STRIPE_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) map->io_align = BTRFS_STRIPE_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) map->io_width = BTRFS_STRIPE_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) map->type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) map->sub_stripes = ctl->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) trace_btrfs_chunk_alloc(info, map, start, ctl->chunk_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) em = alloc_extent_map();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) if (!em) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) kfree(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) em->map_lookup = map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) em->start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) em->len = ctl->chunk_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) em->block_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) em->block_len = em->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) em->orig_block_len = ctl->stripe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) em_tree = &info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) write_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) ret = add_extent_mapping(em_tree, em, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) write_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) write_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) ret = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) goto error_del_extent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) for (i = 0; i < map->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) struct btrfs_device *dev = map->stripes[i].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) btrfs_device_set_bytes_used(dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) dev->bytes_used + ctl->stripe_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) if (list_empty(&dev->post_commit_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) list_add_tail(&dev->post_commit_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) &trans->transaction->dev_update_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) atomic64_sub(ctl->stripe_size * map->num_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) &info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) check_raid56_incompat_flag(info, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) check_raid1c34_incompat_flag(info, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) error_del_extent:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) write_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) remove_extent_mapping(em_tree, em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) write_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) /* One for our allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) /* One for the tree reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) struct btrfs_fs_info *info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) struct btrfs_fs_devices *fs_devices = info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) struct btrfs_device_info *devices_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) struct alloc_chunk_ctl ctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) lockdep_assert_held(&info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) if (!alloc_profile_is_valid(type, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) if (list_empty(&fs_devices->alloc_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) if (btrfs_test_opt(info, ENOSPC_DEBUG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) btrfs_debug(info, "%s: no writable device", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) btrfs_err(info, "invalid chunk type 0x%llx requested", type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) ASSERT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) ctl.start = find_next_chunk(info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) ctl.type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) init_alloc_chunk_ctl(fs_devices, &ctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) if (!devices_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) ret = gather_device_info(fs_devices, &ctl, devices_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) ret = decide_stripe_size(fs_devices, &ctl, devices_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) ret = create_chunk(trans, &ctl, devices_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) kfree(devices_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) * Chunk allocation falls into two parts. The first part does work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) * that makes the new allocated chunk usable, but does not do any operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) * that modifies the chunk tree. The second part does the work that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) * requires modifying the chunk tree. This division is important for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) * bootstrap process of adding storage to a seed btrfs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) u64 chunk_offset, u64 chunk_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) struct btrfs_fs_info *fs_info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) struct btrfs_root *extent_root = fs_info->extent_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) struct btrfs_root *chunk_root = fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) struct btrfs_chunk *chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) struct btrfs_stripe *stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) size_t item_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) u64 dev_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) u64 stripe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) if (IS_ERR(em))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) return PTR_ERR(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) item_size = btrfs_chunk_item_size(map->num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) stripe_size = em->orig_block_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) chunk = kzalloc(item_size, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) if (!chunk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) * Take the device list mutex to prevent races with the final phase of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) * a device replace operation that replaces the device object associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) * with the map's stripes, because the device object's id can change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) * at any time during that final phase of the device replace operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) * (dev-replace.c:btrfs_dev_replace_finishing()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) mutex_lock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) for (i = 0; i < map->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) device = map->stripes[i].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) dev_offset = map->stripes[i].physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) ret = btrfs_update_device(trans, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) dev_offset, stripe_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) stripe = &chunk->stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293) for (i = 0; i < map->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) device = map->stripes[i].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) dev_offset = map->stripes[i].physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) btrfs_set_stack_stripe_devid(stripe, device->devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) btrfs_set_stack_stripe_offset(stripe, dev_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) stripe++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) mutex_unlock(&fs_info->fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) btrfs_set_stack_chunk_length(chunk, chunk_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) btrfs_set_stack_chunk_type(chunk, map->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) key.type = BTRFS_CHUNK_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) key.offset = chunk_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) * TODO: Cleanup of inserted chunk root in case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) * failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) kfree(chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) struct btrfs_fs_info *fs_info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) u64 alloc_profile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) alloc_profile = btrfs_metadata_alloc_profile(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) ret = btrfs_alloc_chunk(trans, alloc_profile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344) alloc_profile = btrfs_system_alloc_profile(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) ret = btrfs_alloc_chunk(trans, alloc_profile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) static inline int btrfs_chunk_max_errors(struct map_lookup *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) const int index = btrfs_bg_flags_to_raid_index(map->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) return btrfs_raid_array[index].tolerated_failures;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) int readonly = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) int miss_ndevs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) if (IS_ERR(em))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369) for (i = 0; i < map->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) if (test_bit(BTRFS_DEV_STATE_MISSING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371) &map->stripes[i].dev->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372) miss_ndevs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) &map->stripes[i].dev->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) readonly = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) goto end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) * If the number of missing devices is larger than max errors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) * we can not write the data into that chunk successfully, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) * set it readonly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) if (miss_ndevs > btrfs_chunk_max_errors(map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) readonly = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) return readonly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394) void btrfs_mapping_tree_free(struct extent_map_tree *tree)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) write_lock(&tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) em = lookup_extent_mapping(tree, 0, (u64)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) if (em)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) remove_extent_mapping(tree, em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) write_unlock(&tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) if (!em)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) /* once for us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) /* once for the tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) em = btrfs_get_chunk_map(fs_info, logical, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) if (IS_ERR(em))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) * We could return errors for these cases, but that could get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) * ugly and we'd probably do the same thing which is just not do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) * anything else and exit, so return 1 so the callers don't try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) * to use other copies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) ret = map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432) else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) ret = map->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) ret = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) * There could be two corrupted data stripes, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) * to loop retry in order to rebuild the correct data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) * Fail a stripe at a time on every retry except the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) * stripe under reconstruction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) ret = map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) down_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451) fs_info->dev_replace.tgtdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) up_read(&fs_info->dev_replace.rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) u64 logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) unsigned long len = fs_info->sectorsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465) em = btrfs_get_chunk_map(fs_info, logical, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) if (!WARN_ON(IS_ERR(em))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) len = map->stripe_len * nr_data_stripes(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) em = btrfs_get_chunk_map(fs_info, logical, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) if(!WARN_ON(IS_ERR(em))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) static int find_live_mirror(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) struct map_lookup *map, int first,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) int dev_replace_is_ongoing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) int num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) int preferred_mirror;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) int tolerance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) struct btrfs_device *srcdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) ASSERT((map->type &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) if (map->type & BTRFS_BLOCK_GROUP_RAID10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) num_stripes = map->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) num_stripes = map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) preferred_mirror = first + current->pid % num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) if (dev_replace_is_ongoing &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) fs_info->dev_replace.cont_reading_from_srcdev_mode ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) srcdev = fs_info->dev_replace.srcdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) srcdev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) * try to avoid the drive that is the source drive for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) * dev-replace procedure, only choose it if no other non-missing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) * mirror is available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) for (tolerance = 0; tolerance < 2; tolerance++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) if (map->stripes[preferred_mirror].dev->bdev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) (tolerance || map->stripes[preferred_mirror].dev != srcdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) return preferred_mirror;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) for (i = first; i < first + num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) if (map->stripes[i].dev->bdev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) (tolerance || map->stripes[i].dev != srcdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) /* we couldn't find one that doesn't fail. Just return something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) * and the io error handling code will clean up eventually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) return preferred_mirror;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) /* Bubble-sort the stripe set to put the parity/syndrome stripes last */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) int again = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) while (again) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) again = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) for (i = 0; i < num_stripes - 1; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) /* Swap if parity is on a smaller index */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) if (bbio->raid_map[i] > bbio->raid_map[i + 1]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) swap(bbio->stripes[i], bbio->stripes[i + 1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) swap(bbio->raid_map[i], bbio->raid_map[i + 1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) again = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563) struct btrfs_bio *bbio = kzalloc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) /* the size of the btrfs_bio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) sizeof(struct btrfs_bio) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566) /* plus the variable array for the stripes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) sizeof(struct btrfs_bio_stripe) * (total_stripes) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) /* plus the variable array for the tgt dev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569) sizeof(int) * (real_stripes) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) * plus the raid_map, which includes both the tgt dev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) * and the stripes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) sizeof(u64) * (total_stripes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) GFP_NOFS|__GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) atomic_set(&bbio->error, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) refcount_set(&bbio->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) return bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) void btrfs_get_bbio(struct btrfs_bio *bbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) WARN_ON(!refcount_read(&bbio->refs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) refcount_inc(&bbio->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) void btrfs_put_bbio(struct btrfs_bio *bbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) if (!bbio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) if (refcount_dec_and_test(&bbio->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) kfree(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) /* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) * Please note that, discard won't be sent to target device of device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) * replace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) u64 logical, u64 *length_ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) struct btrfs_bio **bbio_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) struct btrfs_bio *bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) u64 length = *length_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) u64 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) u64 stripe_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) u64 stripe_nr_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) u64 stripe_end_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) u64 stripe_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) u64 stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) u64 stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) u64 num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) u32 stripe_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) u32 factor = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) u32 sub_stripes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) u64 stripes_per_dev = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625) u32 remaining_stripes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) u32 last_stripe = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) /* discard always return a bbio */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) ASSERT(bbio_ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) em = btrfs_get_chunk_map(fs_info, logical, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634) if (IS_ERR(em))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) return PTR_ERR(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) /* we don't discard raid56 yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644) offset = logical - em->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) length = min_t(u64, em->start + em->len - logical, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) *length_ret = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) stripe_len = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) * stripe_nr counts the total number of stripes we have to stride
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651) * to get to this block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) stripe_nr = div64_u64(offset, stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) /* stripe_offset is the offset of this block in its stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) stripe_offset = offset - stripe_nr * stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) stripe_nr_end = round_up(offset + length, map->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) stripe_cnt = stripe_nr_end - stripe_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) stripe_end_offset = stripe_nr_end * map->stripe_len -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) (offset + length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) * after this, stripe_nr is the number of stripes on this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) * device we have to walk to find the data, and stripe_index is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) * the number of our device in the stripe array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) num_stripes = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) stripe_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670) if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) BTRFS_BLOCK_GROUP_RAID10)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) if (map->type & BTRFS_BLOCK_GROUP_RAID0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) sub_stripes = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) sub_stripes = map->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) factor = map->num_stripes / sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) num_stripes = min_t(u64, map->num_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) sub_stripes * stripe_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) stripe_index *= sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) stripes_per_dev = div_u64_rem(stripe_cnt, factor,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) &remaining_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) last_stripe *= sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) BTRFS_BLOCK_GROUP_DUP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) num_stripes = map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) &stripe_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) bbio = alloc_btrfs_bio(num_stripes, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) if (!bbio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701) bbio->stripes[i].physical =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) map->stripes[stripe_index].physical +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) stripe_offset + stripe_nr * map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) bbio->stripes[i].dev = map->stripes[stripe_index].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706) if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) BTRFS_BLOCK_GROUP_RAID10)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708) bbio->stripes[i].length = stripes_per_dev *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) if (i / sub_stripes < remaining_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) bbio->stripes[i].length +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713) map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) * Special for the first stripe and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) * the last stripe:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) * |-------|...|-------|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) * |----------|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) * off end_off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) if (i < sub_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) bbio->stripes[i].length -=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) if (stripe_index >= last_stripe &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) stripe_index <= (last_stripe +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729) sub_stripes - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) bbio->stripes[i].length -=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) stripe_end_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) if (i == sub_stripes - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) stripe_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) bbio->stripes[i].length = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) stripe_index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) if (stripe_index == map->num_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) stripe_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) stripe_nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746) *bbio_ret = bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) bbio->map_type = map->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748) bbio->num_stripes = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) * In dev-replace case, for repair case (that's the only case where the mirror
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) * is selected explicitly when calling btrfs_map_block), blocks left of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) * left cursor can also be read from the target drive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759) * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) * array of stripes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) * For READ, it also needs to be supported using the same mirror number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) * If the requested block is not left of the left cursor, EIO is returned. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) * can happen because btrfs_num_copies() returns one more in the dev-replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) * case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) u64 logical, u64 length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) u64 srcdev_devid, int *mirror_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) u64 *physical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) int num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) int index_srcdev = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) int found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) u64 physical_of_found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) logical, &length, &bbio, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) ASSERT(bbio == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) num_stripes = bbio->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) if (*mirror_num > num_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) * that means that the requested area is not left of the left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) * cursor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) * process the rest of the function using the mirror_num of the source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) * drive. Therefore look it up first. At the end, patch the device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) * pointer to the one of the target drive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) if (bbio->stripes[i].dev->devid != srcdev_devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) * In case of DUP, in order to keep it simple, only add the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) * mirror with the lowest physical address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) if (found &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) physical_of_found <= bbio->stripes[i].physical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) index_srcdev = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) found = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) physical_of_found = bbio->stripes[i].physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) ASSERT(found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) if (!found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) *mirror_num = index_srcdev + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) *physical = physical_of_found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) static void handle_ops_on_dev_replace(enum btrfs_map_op op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832) struct btrfs_bio **bbio_ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) struct btrfs_dev_replace *dev_replace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) int *num_stripes_ret, int *max_errors_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836) struct btrfs_bio *bbio = *bbio_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) u64 srcdev_devid = dev_replace->srcdev->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838) int tgtdev_indexes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) int num_stripes = *num_stripes_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) int max_errors = *max_errors_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) if (op == BTRFS_MAP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) int index_where_to_add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) * duplicate the write operations while the dev replace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) * procedure is running. Since the copying of the old disk to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) * the new disk takes place at run time while the filesystem is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) * mounted writable, the regular write operations to the old
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) * disk have to be duplicated to go to the new disk as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) * Note that device->missing is handled by the caller, and that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) * the write to the old disk is already set up in the stripes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) * array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857) index_where_to_add = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) if (bbio->stripes[i].dev->devid == srcdev_devid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) /* write to new disk, too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) struct btrfs_bio_stripe *new =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) bbio->stripes + index_where_to_add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) struct btrfs_bio_stripe *old =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) bbio->stripes + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) new->physical = old->physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) new->length = old->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) new->dev = dev_replace->tgtdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) bbio->tgtdev_map[i] = index_where_to_add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) index_where_to_add++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) max_errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) tgtdev_indexes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) num_stripes = index_where_to_add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877) int index_srcdev = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) int found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) u64 physical_of_found = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) * During the dev-replace procedure, the target drive can also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) * be used to read data in case it is needed to repair a corrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) * block elsewhere. This is possible if the requested area is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) * left of the left cursor. In this area, the target drive is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) * full copy of the source drive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) if (bbio->stripes[i].dev->devid == srcdev_devid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) * In case of DUP, in order to keep it simple,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) * only add the mirror with the lowest physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) * address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) if (found &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896) physical_of_found <=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) bbio->stripes[i].physical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) index_srcdev = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) found = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) physical_of_found = bbio->stripes[i].physical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) if (found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) struct btrfs_bio_stripe *tgtdev_stripe =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) bbio->stripes + num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) tgtdev_stripe->physical = physical_of_found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) tgtdev_stripe->length =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) bbio->stripes[index_srcdev].length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) tgtdev_stripe->dev = dev_replace->tgtdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) bbio->tgtdev_map[index_srcdev] = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) tgtdev_indexes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) num_stripes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) *num_stripes_ret = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) *max_errors_ret = max_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) bbio->num_tgtdevs = tgtdev_indexes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) *bbio_ret = bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) static bool need_full_stripe(enum btrfs_map_op op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) * btrfs_get_io_geometry - calculates the geomery of a particular (address, len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932) * tuple. This information is used to calculate how big a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) * particular bio can get before it straddles a stripe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) * @fs_info - the filesystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) * @logical - address that we want to figure out the geometry of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) * @len - the length of IO we are going to perform, starting at @logical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) * @op - type of operation - write or read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) * @io_geom - pointer used to return values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) * Returns < 0 in case a chunk for the given logical address cannot be found,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) * usually shouldn't happen unless @logical is corrupted, 0 otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) u64 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) u64 stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) u64 stripe_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) u64 stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) u64 raid56_full_stripe_start = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) int data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) ASSERT(op != BTRFS_MAP_DISCARD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) em = btrfs_get_chunk_map(fs_info, logical, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) if (IS_ERR(em))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) return PTR_ERR(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) /* Offset of this logical address in the chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965) offset = logical - em->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) /* Len of a stripe in a chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) stripe_len = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) /* Stripe wher this block falls in */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) stripe_nr = div64_u64(offset, stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) /* Offset of stripe in the chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971) stripe_offset = stripe_nr * stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) if (offset < stripe_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) btrfs_crit(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) "stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) stripe_offset, offset, em->start, logical, stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) /* stripe_offset is the offset of this block in its stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) stripe_offset = offset - stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) data_stripes = nr_data_stripes(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985) u64 max_len = stripe_len - stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) * In case of raid56, we need to know the stripe aligned start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) unsigned long full_stripe_len = stripe_len * data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992) raid56_full_stripe_start = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) * Allow a write of a full stripe, but make sure we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996) * don't allow straddling of stripes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998) raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999) full_stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) raid56_full_stripe_start *= full_stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003) * For writes to RAID[56], allow a full stripeset across
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) * all disks. For other RAID types and for RAID[56]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) * reads, just allow a single stripe (on a single disk).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007) if (op == BTRFS_MAP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) max_len = stripe_len * data_stripes -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) (offset - raid56_full_stripe_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012) len = min_t(u64, em->len - offset, max_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014) len = em->len - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) io_geom->len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) io_geom->offset = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019) io_geom->stripe_len = stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) io_geom->stripe_nr = stripe_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) io_geom->stripe_offset = stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) io_geom->raid56_stripe_offset = raid56_full_stripe_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) /* once for us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) enum btrfs_map_op op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) u64 logical, u64 *length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033) struct btrfs_bio **bbio_ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034) int mirror_num, int need_raid_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) u64 stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) u64 stripe_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040) u64 stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041) u32 stripe_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) int data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) int num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046) int max_errors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) int tgtdev_indexes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049) struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) int dev_replace_is_ongoing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051) int num_alloc_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052) int patch_the_first_stripe_for_dev_replace = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053) u64 physical_to_patch_in_first_stripe = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) u64 raid56_full_stripe_start = (u64)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) struct btrfs_io_geometry geom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057) ASSERT(bbio_ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) ASSERT(op != BTRFS_MAP_DISCARD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) em = btrfs_get_chunk_map(fs_info, logical, *length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) ASSERT(!IS_ERR(em));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) *length = geom.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) stripe_len = geom.stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070) stripe_nr = geom.stripe_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071) stripe_offset = geom.stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072) raid56_full_stripe_start = geom.raid56_stripe_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073) data_stripes = nr_data_stripes(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075) down_read(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076) dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078) * Hold the semaphore for read during the whole operation, write is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079) * requested at commit time but must wait.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081) if (!dev_replace_is_ongoing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082) up_read(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084) if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) ret = get_extra_mirror_from_replace(fs_info, logical, *length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) dev_replace->srcdev->devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088) &mirror_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) &physical_to_patch_in_first_stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) patch_the_first_stripe_for_dev_replace = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) } else if (mirror_num > map->num_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) mirror_num = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) num_stripes = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) stripe_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100) if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101) stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102) &stripe_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) if (!need_full_stripe(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) mirror_num = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) if (need_full_stripe(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107) num_stripes = map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108) else if (mirror_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) stripe_index = mirror_num - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) stripe_index = find_live_mirror(fs_info, map, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) dev_replace_is_ongoing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) mirror_num = stripe_index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116) } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) if (need_full_stripe(op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) num_stripes = map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) } else if (mirror_num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120) stripe_index = mirror_num - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) mirror_num = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126) u32 factor = map->num_stripes / map->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) stripe_index *= map->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131) if (need_full_stripe(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132) num_stripes = map->sub_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133) else if (mirror_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) stripe_index += mirror_num - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136) int old_stripe_index = stripe_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137) stripe_index = find_live_mirror(fs_info, map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) stripe_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139) dev_replace_is_ongoing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) mirror_num = stripe_index - old_stripe_index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143) } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144) if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145) /* push stripe_nr back to the start of the full stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146) stripe_nr = div64_u64(raid56_full_stripe_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147) stripe_len * data_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149) /* RAID[56] write or recovery. Return all stripes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150) num_stripes = map->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) max_errors = nr_parity_stripes(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) *length = map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154) stripe_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) stripe_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158) * Mirror #0 or #1 means the original data block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159) * Mirror #2 is RAID5 parity block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160) * Mirror #3 is RAID6 Q block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162) stripe_nr = div_u64_rem(stripe_nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163) data_stripes, &stripe_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164) if (mirror_num > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) stripe_index = data_stripes + mirror_num - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) /* We distribute the parity blocks across stripes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169) &stripe_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) if (!need_full_stripe(op) && mirror_num <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171) mirror_num = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) * after this, stripe_nr is the number of stripes on this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) * device we have to walk to find the data, and stripe_index is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177) * the number of our device in the stripe array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179) stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180) &stripe_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181) mirror_num = stripe_index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183) if (stripe_index >= map->num_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184) btrfs_crit(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186) stripe_index, map->num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191) num_alloc_stripes = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193) if (op == BTRFS_MAP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) num_alloc_stripes <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195) if (op == BTRFS_MAP_GET_READ_MIRRORS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196) num_alloc_stripes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) tgtdev_indexes = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201) if (!bbio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) bbio->stripes[i].physical = map->stripes[stripe_index].physical +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208) stripe_offset + stripe_nr * map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) bbio->stripes[i].dev = map->stripes[stripe_index].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210) stripe_index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213) /* build raid_map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) (need_full_stripe(op) || mirror_num > 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217) unsigned rot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) /* Work out the disk rotation on this stripe-set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220) div_u64_rem(stripe_nr, num_stripes, &rot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222) /* Fill in the logical address of each stripe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223) tmp = stripe_nr * data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224) for (i = 0; i < data_stripes; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225) bbio->raid_map[(i+rot) % num_stripes] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226) em->start + (tmp + i) * map->stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228) bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229) if (map->type & BTRFS_BLOCK_GROUP_RAID6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230) bbio->raid_map[(i+rot+1) % num_stripes] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) RAID6_Q_STRIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) sort_parity_stripes(bbio, num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236) if (need_full_stripe(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237) max_errors = btrfs_chunk_max_errors(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239) if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) need_full_stripe(op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241) handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242) &max_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) *bbio_ret = bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) bbio->map_type = map->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247) bbio->num_stripes = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248) bbio->max_errors = max_errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) bbio->mirror_num = mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) * this is the case that REQ_READ && dev_replace_is_ongoing &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) * mirror_num == num_stripes + 1 && dev_replace target drive is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254) * available as a mirror
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257) WARN_ON(num_stripes > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) bbio->stripes[0].dev = dev_replace->tgtdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259) bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) bbio->mirror_num = map->num_stripes + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) if (dev_replace_is_ongoing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) lockdep_assert_held(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265) /* Unlock and let waiting writers proceed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266) up_read(&dev_replace->rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273) u64 logical, u64 *length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274) struct btrfs_bio **bbio_ret, int mirror_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276) if (op == BTRFS_MAP_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) return __btrfs_map_block_for_discard(fs_info, logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278) length, bbio_ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280) return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) mirror_num, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) /* For Scrub/replace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286) u64 logical, u64 *length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) struct btrfs_bio **bbio_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294) bio->bi_private = bbio->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) bio->bi_end_io = bbio->end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) btrfs_put_bbio(bbio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301) static void btrfs_end_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303) struct btrfs_bio *bbio = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304) int is_orig_bio = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307) atomic_inc(&bbio->error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308) if (bio->bi_status == BLK_STS_IOERR ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) bio->bi_status == BLK_STS_TARGET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310) struct btrfs_device *dev = btrfs_io_bio(bio)->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312) ASSERT(dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313) if (bio_op(bio) == REQ_OP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314) btrfs_dev_stat_inc_and_print(dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315) BTRFS_DEV_STAT_WRITE_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316) else if (!(bio->bi_opf & REQ_RAHEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) btrfs_dev_stat_inc_and_print(dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) BTRFS_DEV_STAT_READ_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319) if (bio->bi_opf & REQ_PREFLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320) btrfs_dev_stat_inc_and_print(dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) BTRFS_DEV_STAT_FLUSH_ERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) if (bio == bbio->orig_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326) is_orig_bio = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328) btrfs_bio_counter_dec(bbio->fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) if (atomic_dec_and_test(&bbio->stripes_pending)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) if (!is_orig_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) bio = bbio->orig_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337) /* only send an error to the higher layers if it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338) * beyond the tolerance of the btrfs bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) if (atomic_read(&bbio->error) > bbio->max_errors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) * this bio is actually up to date, we didn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) * go over the max number of errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347) bio->bi_status = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) btrfs_end_bbio(bbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) } else if (!is_orig_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356) static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) u64 physical, struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) struct btrfs_fs_info *fs_info = bbio->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361) bio->bi_private = bbio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) btrfs_io_bio(bio)->device = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) bio->bi_end_io = btrfs_end_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) bio->bi_iter.bi_sector = physical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365) btrfs_debug_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367) bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368) (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369) dev->devid, bio->bi_iter.bi_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) bio_set_dev(bio, dev->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) btrfs_bio_counter_inc_noblocked(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374) btrfsic_submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377) static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379) atomic_inc(&bbio->error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380) if (atomic_dec_and_test(&bbio->stripes_pending)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381) /* Should be the original bio. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382) WARN_ON(bio != bbio->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384) btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385) bio->bi_iter.bi_sector = logical >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386) if (atomic_read(&bbio->error) > bbio->max_errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387) bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389) bio->bi_status = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390) btrfs_end_bbio(bbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394) blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) int mirror_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) struct bio *first_bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399) u64 logical = (u64)bio->bi_iter.bi_sector << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400) u64 length = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) u64 map_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403) int dev_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) int total_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405) struct btrfs_bio *bbio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407) length = bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408) map_length = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) btrfs_bio_counter_inc_blocked(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411) ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412) &map_length, &bbio, mirror_num, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415) return errno_to_blk_status(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418) total_devs = bbio->num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419) bbio->orig_bio = first_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) bbio->private = first_bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) bbio->end_io = first_bio->bi_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) bbio->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) atomic_set(&bbio->stripes_pending, bbio->num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425) if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) /* In this case, map_length has been set to the length of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) a single stripe; not the whole write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429) if (bio_op(bio) == REQ_OP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) ret = raid56_parity_write(fs_info, bio, bbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) map_length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) ret = raid56_parity_recover(fs_info, bio, bbio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) map_length, mirror_num, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438) return errno_to_blk_status(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) if (map_length < length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442) btrfs_crit(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443) "mapping failed logical %llu bio len %llu len %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) logical, length, map_length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448) for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) dev = bbio->stripes[dev_nr].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451) &dev->dev_state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) (bio_op(first_bio) == REQ_OP_WRITE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453) !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454) bbio_error(bbio, first_bio, logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458) if (dev_nr < total_devs - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) bio = btrfs_bio_clone(first_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) bio = first_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463) submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) btrfs_bio_counter_dec(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) return BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) * Find a device specified by @devid or @uuid in the list of @fs_devices, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) * return NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) * If devid and uuid are both specified, the match must be exact, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) * only devid is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) * If @seed is true, traverse through the seed devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) u64 devid, u8 *uuid, u8 *fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) bool seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483) struct btrfs_fs_devices *seed_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) list_for_each_entry(device, &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487) if (device->devid == devid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) (!uuid || memcmp(device->uuid, uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) BTRFS_UUID_SIZE) == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495) if (!fsid ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496) !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497) list_for_each_entry(device, &seed_devs->devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498) dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) if (device->devid == devid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) (!uuid || memcmp(device->uuid, uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) BTRFS_UUID_SIZE) == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511) u64 devid, u8 *dev_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514) unsigned int nofs_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) * We call this under the chunk_mutex, so we want to use NOFS for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) * allocation, however we don't want to change btrfs_alloc_device() to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519) * always do NOFS because we use it in a lot of other GFP_KERNEL safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520) * places.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522) nofs_flag = memalloc_nofs_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) device = btrfs_alloc_device(NULL, &devid, dev_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524) memalloc_nofs_restore(nofs_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525) if (IS_ERR(device))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528) list_add(&device->dev_list, &fs_devices->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529) device->fs_devices = fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) fs_devices->num_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) fs_devices->missing_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535) return device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) * btrfs_alloc_device - allocate struct btrfs_device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540) * @fs_info: used only for generating a new devid, can be NULL if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541) * devid is provided (i.e. @devid != NULL).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542) * @devid: a pointer to devid for this device. If NULL a new devid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543) * is generated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544) * @uuid: a pointer to UUID for this device. If NULL a new UUID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545) * is generated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547) * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548) * on error. Returned struct is not linked onto any lists and must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549) * destroyed with btrfs_free_device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551) struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552) const u64 *devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553) const u8 *uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556) u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558) if (WARN_ON(!devid && !fs_info))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561) dev = __alloc_device(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562) if (IS_ERR(dev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) return dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565) if (devid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) tmp = *devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570) ret = find_next_devid(fs_info, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572) btrfs_free_device(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576) dev->devid = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578) if (uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579) memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581) generate_random_uuid(dev->uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583) return dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586) static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587) u64 devid, u8 *uuid, bool error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590) btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591) devid, uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593) btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594) devid, uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597) static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) int index = btrfs_bg_flags_to_raid_index(type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600) int ncopies = btrfs_raid_array[index].ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601) const int nparity = btrfs_raid_array[index].nparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602) int data_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604) if (nparity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605) data_stripes = num_stripes - nparity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607) data_stripes = num_stripes / ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609) return div_u64(chunk_len, data_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613) struct btrfs_chunk *chunk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615) struct btrfs_fs_info *fs_info = leaf->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) struct extent_map_tree *map_tree = &fs_info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) u64 logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) u64 length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621) u64 devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622) u8 uuid[BTRFS_UUID_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) int num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) logical = key->offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) length = btrfs_chunk_length(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629) num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) * Only need to verify chunk item if we're reading from sys chunk array,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633) * as chunk item in tree block is already verified by tree-checker.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635) if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636) ret = btrfs_check_chunk_valid(leaf, chunk, logical);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641) read_lock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642) em = lookup_extent_mapping(map_tree, logical, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) read_unlock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) /* already mapped? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) if (em && em->start <= logical && em->start + em->len > logical) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649) } else if (em) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) em = alloc_extent_map();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654) if (!em)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) if (!map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662) set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) em->map_lookup = map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664) em->start = logical;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) em->len = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666) em->orig_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) em->block_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668) em->block_len = em->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670) map->num_stripes = num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671) map->io_width = btrfs_chunk_io_width(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672) map->io_align = btrfs_chunk_io_align(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673) map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) map->type = btrfs_chunk_type(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676) map->verified_stripes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677) em->orig_block_len = calc_stripe_length(map->type, em->len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) map->num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) for (i = 0; i < num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680) map->stripes[i].physical =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) btrfs_stripe_offset_nr(leaf, chunk, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) devid = btrfs_stripe_devid_nr(leaf, chunk, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683) read_extent_buffer(leaf, uuid, (unsigned long)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) btrfs_stripe_dev_uuid_nr(chunk, i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685) BTRFS_UUID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) devid, uuid, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688) if (!map->stripes[i].dev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689) !btrfs_test_opt(fs_info, DEGRADED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691) btrfs_report_missing_device(fs_info, devid, uuid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694) if (!map->stripes[i].dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695) map->stripes[i].dev =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) add_missing_dev(fs_info->fs_devices, devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698) if (IS_ERR(map->stripes[i].dev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) "failed to init missing dev %llu: %ld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702) devid, PTR_ERR(map->stripes[i].dev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703) return PTR_ERR(map->stripes[i].dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705) btrfs_report_missing_device(fs_info, devid, uuid, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707) set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708) &(map->stripes[i].dev->dev_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712) write_lock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713) ret = add_extent_mapping(map_tree, em, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) write_unlock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) "failed to add chunk map, start=%llu len=%llu: %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) em->start, em->len, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6722) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6725) static void fill_device_from_item(struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6726) struct btrfs_dev_item *dev_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6727) struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6729) unsigned long ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6731) device->devid = btrfs_device_id(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6732) device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6733) device->total_bytes = device->disk_total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6734) device->commit_total_bytes = device->disk_total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6735) device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6736) device->commit_bytes_used = device->bytes_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6737) device->type = btrfs_device_type(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6738) device->io_align = btrfs_device_io_align(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6739) device->io_width = btrfs_device_io_width(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6740) device->sector_size = btrfs_device_sector_size(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6741) WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6742) clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6744) ptr = btrfs_device_uuid(dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6745) read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6746) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6748) static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6749) u8 *fsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6751) struct btrfs_fs_devices *fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6752) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6754) lockdep_assert_held(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6755) ASSERT(fsid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6757) /* This will match only for multi-device seed fs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6758) list_for_each_entry(fs_devices, &fs_info->fs_devices->seed_list, seed_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6759) if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6760) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6763) fs_devices = find_fsid(fsid, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6764) if (!fs_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6765) if (!btrfs_test_opt(fs_info, DEGRADED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6766) return ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6768) fs_devices = alloc_fs_devices(fsid, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6769) if (IS_ERR(fs_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6770) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6772) fs_devices->seeding = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6773) fs_devices->opened = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6774) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6777) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6778) * Upon first call for a seed fs fsid, just create a private copy of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6779) * respective fs_devices and anchor it at fs_info->fs_devices->seed_list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6780) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6781) fs_devices = clone_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6782) if (IS_ERR(fs_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6783) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6785) ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6786) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6787) free_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6788) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6791) if (!fs_devices->seeding) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6792) close_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6793) free_fs_devices(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6794) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6797) list_add(&fs_devices->seed_list, &fs_info->fs_devices->seed_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6799) return fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6802) static int read_one_dev(struct extent_buffer *leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6803) struct btrfs_dev_item *dev_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6804) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6805) struct btrfs_fs_info *fs_info = leaf->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6806) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6807) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6808) u64 devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6809) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6810) u8 fs_uuid[BTRFS_FSID_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6811) u8 dev_uuid[BTRFS_UUID_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6813) devid = btrfs_device_id(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6814) read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6815) BTRFS_UUID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6816) read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6817) BTRFS_FSID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6819) if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6820) fs_devices = open_seed_devices(fs_info, fs_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6821) if (IS_ERR(fs_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6822) return PTR_ERR(fs_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6825) device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6826) fs_uuid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6827) if (!device) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6828) if (!btrfs_test_opt(fs_info, DEGRADED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6829) btrfs_report_missing_device(fs_info, devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6830) dev_uuid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6831) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6834) device = add_missing_dev(fs_devices, devid, dev_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6835) if (IS_ERR(device)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6836) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6837) "failed to add missing dev %llu: %ld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6838) devid, PTR_ERR(device));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6839) return PTR_ERR(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6841) btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6842) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6843) if (!device->bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6844) if (!btrfs_test_opt(fs_info, DEGRADED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6845) btrfs_report_missing_device(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6846) devid, dev_uuid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6847) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6849) btrfs_report_missing_device(fs_info, devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6850) dev_uuid, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6853) if (!device->bdev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6854) !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6855) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6856) * this happens when a device that was properly setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6857) * in the device info lists suddenly goes bad.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6858) * device->bdev is NULL, and so we have to set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6859) * device->missing to one here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6860) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6861) device->fs_devices->missing_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6862) set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6865) /* Move the device to its own fs_devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6866) if (device->fs_devices != fs_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6867) ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6868) &device->dev_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6870) list_move(&device->dev_list, &fs_devices->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6871) device->fs_devices->num_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6872) fs_devices->num_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6874) device->fs_devices->missing_devices--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6875) fs_devices->missing_devices++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6877) device->fs_devices = fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6881) if (device->fs_devices != fs_info->fs_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6882) BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6883) if (device->generation !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6884) btrfs_device_generation(leaf, dev_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6885) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6888) fill_device_from_item(leaf, dev_item, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6889) set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6890) if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6891) !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6892) device->fs_devices->total_rw_bytes += device->total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6893) atomic64_add(device->total_bytes - device->bytes_used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6894) &fs_info->free_chunk_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6896) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6897) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6900) int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6902) struct btrfs_root *root = fs_info->tree_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6903) struct btrfs_super_block *super_copy = fs_info->super_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6904) struct extent_buffer *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6905) struct btrfs_disk_key *disk_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6906) struct btrfs_chunk *chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6907) u8 *array_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6908) unsigned long sb_array_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6909) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6910) u32 num_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6911) u32 array_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6912) u32 len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6913) u32 cur_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6914) u64 type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6915) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6917) ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6918) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6919) * This will create extent buffer of nodesize, superblock size is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6920) * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6921) * overallocate but we can keep it as-is, only the first page is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6922) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6923) sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6924) if (IS_ERR(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6925) return PTR_ERR(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6926) set_extent_buffer_uptodate(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6927) btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6928) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6929) * The sb extent buffer is artificial and just used to read the system array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6930) * set_extent_buffer_uptodate() call does not properly mark all it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6931) * pages up-to-date when the page is larger: extent does not cover the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6932) * whole page and consequently check_page_uptodate does not find all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6933) * the page's extents up-to-date (the hole beyond sb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6934) * write_extent_buffer then triggers a WARN_ON.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6935) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6936) * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6937) * but sb spans only this function. Add an explicit SetPageUptodate call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6938) * to silence the warning eg. on PowerPC 64.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6939) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6940) if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6941) SetPageUptodate(sb->pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6943) write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6944) array_size = btrfs_super_sys_array_size(super_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6946) array_ptr = super_copy->sys_chunk_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6947) sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6948) cur_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6950) while (cur_offset < array_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6951) disk_key = (struct btrfs_disk_key *)array_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6952) len = sizeof(*disk_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6953) if (cur_offset + len > array_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6954) goto out_short_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6956) btrfs_disk_key_to_cpu(&key, disk_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6958) array_ptr += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6959) sb_array_offset += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6960) cur_offset += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6962) if (key.type != BTRFS_CHUNK_ITEM_KEY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6963) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6964) "unexpected item type %u in sys_array at offset %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6965) (u32)key.type, cur_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6966) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6967) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6970) chunk = (struct btrfs_chunk *)sb_array_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6972) * At least one btrfs_chunk with one stripe must be present,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6973) * exact stripe count check comes afterwards
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6974) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6975) len = btrfs_chunk_item_size(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6976) if (cur_offset + len > array_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6977) goto out_short_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6979) num_stripes = btrfs_chunk_num_stripes(sb, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6980) if (!num_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6981) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6982) "invalid number of stripes %u in sys_array at offset %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6983) num_stripes, cur_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6984) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6985) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6988) type = btrfs_chunk_type(sb, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6989) if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6990) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6991) "invalid chunk type %llu in sys_array at offset %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6992) type, cur_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6993) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6994) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6997) len = btrfs_chunk_item_size(num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6998) if (cur_offset + len > array_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6999) goto out_short_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7001) ret = read_one_chunk(&key, sb, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7002) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7003) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7005) array_ptr += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7006) sb_array_offset += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7007) cur_offset += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7008) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7009) clear_extent_buffer_uptodate(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7010) free_extent_buffer_stale(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7011) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7013) out_short_read:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7014) btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7015) len, cur_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7016) clear_extent_buffer_uptodate(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7017) free_extent_buffer_stale(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7018) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7021) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7022) * Check if all chunks in the fs are OK for read-write degraded mount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7023) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7024) * If the @failing_dev is specified, it's accounted as missing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7025) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7026) * Return true if all chunks meet the minimal RW mount requirements.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7027) * Return false if any chunk doesn't meet the minimal RW mount requirements.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7028) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7029) bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7030) struct btrfs_device *failing_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7032) struct extent_map_tree *map_tree = &fs_info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7033) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7034) u64 next_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7035) bool ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7037) read_lock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7038) em = lookup_extent_mapping(map_tree, 0, (u64)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7039) read_unlock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7040) /* No chunk at all? Return false anyway */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7041) if (!em) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7042) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7043) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7045) while (em) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7046) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7047) int missing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7048) int max_tolerated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7049) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7051) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7052) max_tolerated =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7053) btrfs_get_num_tolerated_disk_barrier_failures(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7054) map->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7055) for (i = 0; i < map->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7056) struct btrfs_device *dev = map->stripes[i].dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7058) if (!dev || !dev->bdev ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7059) test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7060) dev->last_flush_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7061) missing++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7062) else if (failing_dev && failing_dev == dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7063) missing++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7065) if (missing > max_tolerated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7066) if (!failing_dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7067) btrfs_warn(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7068) "chunk %llu missing %d devices, max tolerance is %d for writable mount",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7069) em->start, missing, max_tolerated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7070) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7071) ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7072) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7074) next_start = extent_map_end(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7075) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7077) read_lock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7078) em = lookup_extent_mapping(map_tree, next_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7079) (u64)(-1) - next_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7080) read_unlock(&map_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7082) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7083) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7086) static void readahead_tree_node_children(struct extent_buffer *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7088) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7089) const int nr_items = btrfs_header_nritems(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7091) for (i = 0; i < nr_items; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7092) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7094) start = btrfs_node_blockptr(node, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7095) readahead_tree_block(node->fs_info, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7099) int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7100) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7101) struct btrfs_root *root = fs_info->chunk_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7102) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7103) struct extent_buffer *leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7104) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7105) struct btrfs_key found_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7106) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7107) int slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7108) u64 total_dev = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7109) u64 last_ra_node = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7111) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7112) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7113) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7115) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7116) * uuid_mutex is needed only if we are mounting a sprout FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7117) * otherwise we don't need it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7119) mutex_lock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7122) * It is possible for mount and umount to race in such a way that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7123) * we execute this code path, but open_fs_devices failed to clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7124) * total_rw_bytes. We certainly want it cleared before reading the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7125) * device items, so clear it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7126) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7127) fs_info->fs_devices->total_rw_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7130) * Read all device items, and then all the chunk items. All
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7131) * device items are found before any chunk item (their object id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7132) * is smaller than the lowest possible object id for a chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7133) * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7135) key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7136) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7137) key.type = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7138) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7139) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7140) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7141) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7142) struct extent_buffer *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7144) leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7145) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7146) if (slot >= btrfs_header_nritems(leaf)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7147) ret = btrfs_next_leaf(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7148) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7149) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7150) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7151) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7152) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7154) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7155) * The nodes on level 1 are not locked but we don't need to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7156) * that during mount time as nothing else can access the tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7158) node = path->nodes[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7159) if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7160) if (last_ra_node != node->start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7161) readahead_tree_node_children(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7162) last_ra_node = node->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7165) btrfs_item_key_to_cpu(leaf, &found_key, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7166) if (found_key.type == BTRFS_DEV_ITEM_KEY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7167) struct btrfs_dev_item *dev_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7168) dev_item = btrfs_item_ptr(leaf, slot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7169) struct btrfs_dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7170) ret = read_one_dev(leaf, dev_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7171) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7172) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7173) total_dev++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7174) } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7175) struct btrfs_chunk *chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7176) chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7177) mutex_lock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7178) ret = read_one_chunk(&found_key, leaf, chunk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7179) mutex_unlock(&fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7180) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7181) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7183) path->slots[0]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7186) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7187) * After loading chunk tree, we've got all device information,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7188) * do another round of validation checks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7190) if (total_dev != fs_info->fs_devices->total_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7191) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7192) "super_num_devices %llu mismatch with num_devices %llu found here",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7193) btrfs_super_num_devices(fs_info->super_copy),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7194) total_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7195) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7196) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7198) if (btrfs_super_total_bytes(fs_info->super_copy) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7199) fs_info->fs_devices->total_rw_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7200) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7201) "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7202) btrfs_super_total_bytes(fs_info->super_copy),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7203) fs_info->fs_devices->total_rw_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7204) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7205) goto error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7207) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7208) error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7209) mutex_unlock(&uuid_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7211) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7212) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7215) void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7217) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7218) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7220) fs_devices->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7222) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7223) list_for_each_entry(device, &fs_devices->devices, dev_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7224) device->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7226) list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7227) list_for_each_entry(device, &seed_devs->devices, dev_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7228) device->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7230) seed_devs->fs_info = fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7232) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7235) static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7236) const struct btrfs_dev_stats_item *ptr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7237) int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7239) u64 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7241) read_extent_buffer(eb, &val,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7242) offsetof(struct btrfs_dev_stats_item, values) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7243) ((unsigned long)ptr) + (index * sizeof(u64)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7244) sizeof(val));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7245) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7248) static void btrfs_set_dev_stats_value(struct extent_buffer *eb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7249) struct btrfs_dev_stats_item *ptr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7250) int index, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7252) write_extent_buffer(eb, &val,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7253) offsetof(struct btrfs_dev_stats_item, values) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7254) ((unsigned long)ptr) + (index * sizeof(u64)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7255) sizeof(val));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7258) static int btrfs_device_init_dev_stats(struct btrfs_device *device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7259) struct btrfs_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7261) struct btrfs_dev_stats_item *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7262) struct extent_buffer *eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7263) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7264) int item_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7265) int i, ret, slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7267) key.objectid = BTRFS_DEV_STATS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7268) key.type = BTRFS_PERSISTENT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7269) key.offset = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7270) ret = btrfs_search_slot(NULL, device->fs_info->dev_root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7271) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7272) for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7273) btrfs_dev_stat_set(device, i, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7274) device->dev_stats_valid = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7275) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7276) return ret < 0 ? ret : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7278) slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7279) eb = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7280) item_size = btrfs_item_size_nr(eb, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7282) ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_stats_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7284) for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7285) if (item_size >= (1 + i) * sizeof(__le64))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7286) btrfs_dev_stat_set(device, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7287) btrfs_dev_stats_value(eb, ptr, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7288) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7289) btrfs_dev_stat_set(device, i, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7292) device->dev_stats_valid = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7293) btrfs_dev_stat_print_on_load(device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7294) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7296) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7299) int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7301) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7302) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7303) struct btrfs_path *path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7304) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7306) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7307) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7308) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7310) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7311) list_for_each_entry(device, &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7312) ret = btrfs_device_init_dev_stats(device, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7313) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7314) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7316) list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7317) list_for_each_entry(device, &seed_devs->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7318) ret = btrfs_device_init_dev_stats(device, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7319) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7320) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7323) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7324) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7326) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7327) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7330) static int update_dev_stat_item(struct btrfs_trans_handle *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7331) struct btrfs_device *device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7332) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7333) struct btrfs_fs_info *fs_info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7334) struct btrfs_root *dev_root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7335) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7336) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7337) struct extent_buffer *eb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7338) struct btrfs_dev_stats_item *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7339) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7340) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7342) key.objectid = BTRFS_DEV_STATS_OBJECTID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7343) key.type = BTRFS_PERSISTENT_ITEM_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7344) key.offset = device->devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7346) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7347) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7348) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7349) ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7350) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7351) btrfs_warn_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7352) "error %d while searching for dev_stats item for device %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7353) ret, rcu_str_deref(device->name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7354) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7357) if (ret == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7358) btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7359) /* need to delete old one and insert a new one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7360) ret = btrfs_del_item(trans, dev_root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7361) if (ret != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7362) btrfs_warn_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7363) "delete too small dev_stats item for device %s failed %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7364) rcu_str_deref(device->name), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7365) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7367) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7370) if (ret == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7371) /* need to insert a new item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7372) btrfs_release_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7373) ret = btrfs_insert_empty_item(trans, dev_root, path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7374) &key, sizeof(*ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7375) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7376) btrfs_warn_in_rcu(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7377) "insert dev_stats item for device %s failed %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7378) rcu_str_deref(device->name), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7379) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7383) eb = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7384) ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7385) for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7386) btrfs_set_dev_stats_value(eb, ptr, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7387) btrfs_dev_stat_read(device, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7388) btrfs_mark_buffer_dirty(eb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7390) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7391) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7392) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7395) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7396) * called from commit_transaction. Writes all changed device stats to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7397) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7398) int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7400) struct btrfs_fs_info *fs_info = trans->fs_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7401) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7402) struct btrfs_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7403) int stats_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7404) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7406) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7407) list_for_each_entry(device, &fs_devices->devices, dev_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7408) stats_cnt = atomic_read(&device->dev_stats_ccnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7409) if (!device->dev_stats_valid || stats_cnt == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7410) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7413) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7414) * There is a LOAD-LOAD control dependency between the value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7415) * dev_stats_ccnt and updating the on-disk values which requires
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7416) * reading the in-memory counters. Such control dependencies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7417) * require explicit read memory barriers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7418) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7419) * This memory barriers pairs with smp_mb__before_atomic in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7420) * btrfs_dev_stat_inc/btrfs_dev_stat_set and with the full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7421) * barrier implied by atomic_xchg in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7422) * btrfs_dev_stats_read_and_reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7423) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7424) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7426) ret = update_dev_stat_item(trans, device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7427) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7428) atomic_sub(stats_cnt, &device->dev_stats_ccnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7430) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7432) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7435) void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7437) btrfs_dev_stat_inc(dev, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7438) btrfs_dev_stat_print_on_error(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7441) static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7443) if (!dev->dev_stats_valid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7444) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7445) btrfs_err_rl_in_rcu(dev->fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7446) "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7447) rcu_str_deref(dev->name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7448) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7449) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7450) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7451) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7452) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7455) static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7457) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7459) for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7460) if (btrfs_dev_stat_read(dev, i) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7461) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7462) if (i == BTRFS_DEV_STAT_VALUES_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7463) return; /* all values == 0, suppress message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7465) btrfs_info_in_rcu(dev->fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7466) "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7467) rcu_str_deref(dev->name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7468) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7469) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7470) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7471) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7472) btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7475) int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7476) struct btrfs_ioctl_get_dev_stats *stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7478) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7479) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7480) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7482) mutex_lock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7483) dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7484) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7485) mutex_unlock(&fs_devices->device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7487) if (!dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7488) btrfs_warn(fs_info, "get dev_stats failed, device not found");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7489) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7490) } else if (!dev->dev_stats_valid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7491) btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7492) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7493) } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7494) for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7495) if (stats->nr_items > i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7496) stats->values[i] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7497) btrfs_dev_stat_read_and_reset(dev, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7498) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7499) btrfs_dev_stat_set(dev, i, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7501) btrfs_info(fs_info, "device stats zeroed by %s (%d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7502) current->comm, task_pid_nr(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7503) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7504) for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7505) if (stats->nr_items > i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7506) stats->values[i] = btrfs_dev_stat_read(dev, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7508) if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7509) stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7510) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7513) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7514) * Update the size and bytes used for each device where it changed. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7515) * delayed since we would otherwise get errors while writing out the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7516) * superblocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7517) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7518) * Must be invoked during transaction commit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7519) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7520) void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7521) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7522) struct btrfs_device *curr, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7524) ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7526) if (list_empty(&trans->dev_update_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7527) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7530) * We don't need the device_list_mutex here. This list is owned by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7531) * transaction and the transaction must complete before the device is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7532) * released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7534) mutex_lock(&trans->fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7535) list_for_each_entry_safe(curr, next, &trans->dev_update_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7536) post_commit_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7537) list_del_init(&curr->post_commit_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7538) curr->commit_total_bytes = curr->disk_total_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7539) curr->commit_bytes_used = curr->bytes_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7541) mutex_unlock(&trans->fs_info->chunk_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7544) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7545) * Multiplicity factor for simple profiles: DUP, RAID1-like and RAID10.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7546) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7547) int btrfs_bg_type_to_factor(u64 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7548) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7549) const int index = btrfs_bg_flags_to_raid_index(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7551) return btrfs_raid_array[index].ncopies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7556) static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7557) u64 chunk_offset, u64 devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7558) u64 physical_offset, u64 physical_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7560) struct extent_map_tree *em_tree = &fs_info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7561) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7562) struct map_lookup *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7563) struct btrfs_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7564) u64 stripe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7565) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7566) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7567) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7569) read_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7570) em = lookup_extent_mapping(em_tree, chunk_offset, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7571) read_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7573) if (!em) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7574) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7575) "dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7576) physical_offset, devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7577) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7578) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7581) map = em->map_lookup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7582) stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7583) if (physical_len != stripe_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7584) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7585) "dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7586) physical_offset, devid, em->start, physical_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7587) stripe_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7588) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7589) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7592) for (i = 0; i < map->num_stripes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7593) if (map->stripes[i].dev->devid == devid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7594) map->stripes[i].physical == physical_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7595) found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7596) if (map->verified_stripes >= map->num_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7597) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7598) "too many dev extents for chunk %llu found",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7599) em->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7600) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7601) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7603) map->verified_stripes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7604) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7607) if (!found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7608) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7609) "dev extent physical offset %llu devid %llu has no corresponding chunk",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7610) physical_offset, devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7611) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7614) /* Make sure no dev extent is beyond device bondary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7615) dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7616) if (!dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7617) btrfs_err(fs_info, "failed to find devid %llu", devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7618) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7619) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7622) /* It's possible this device is a dummy for seed device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7623) if (dev->disk_total_bytes == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7624) struct btrfs_fs_devices *devs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7626) devs = list_first_entry(&fs_info->fs_devices->seed_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7627) struct btrfs_fs_devices, seed_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7628) dev = btrfs_find_device(devs, devid, NULL, NULL, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7629) if (!dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7630) btrfs_err(fs_info, "failed to find seed devid %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7631) devid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7632) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7633) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7637) if (physical_offset + physical_len > dev->disk_total_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7638) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7639) "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7640) devid, physical_offset, physical_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7641) dev->disk_total_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7642) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7643) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7645) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7646) free_extent_map(em);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7647) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7650) static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7652) struct extent_map_tree *em_tree = &fs_info->mapping_tree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7653) struct extent_map *em;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7654) struct rb_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7655) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7657) read_lock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7658) for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7659) em = rb_entry(node, struct extent_map, rb_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7660) if (em->map_lookup->num_stripes !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7661) em->map_lookup->verified_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7662) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7663) "chunk %llu has missing dev extent, have %d expect %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7664) em->start, em->map_lookup->verified_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7665) em->map_lookup->num_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7666) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7667) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7670) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7671) read_unlock(&em_tree->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7672) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7675) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7676) * Ensure that all dev extents are mapped to correct chunk, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7677) * later chunk allocation/free would cause unexpected behavior.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7678) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7679) * NOTE: This will iterate through the whole device tree, which should be of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7680) * the same size level as the chunk tree. This slightly increases mount time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7681) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7682) int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7684) struct btrfs_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7685) struct btrfs_root *root = fs_info->dev_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7686) struct btrfs_key key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7687) u64 prev_devid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7688) u64 prev_dev_ext_end = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7689) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7691) key.objectid = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7692) key.type = BTRFS_DEV_EXTENT_KEY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7693) key.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7695) path = btrfs_alloc_path();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7696) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7697) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7699) path->reada = READA_FORWARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7700) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7701) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7702) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7704) if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7705) ret = btrfs_next_item(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7706) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7707) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7708) /* No dev extents at all? Not good */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7709) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7710) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7711) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7714) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7715) struct extent_buffer *leaf = path->nodes[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7716) struct btrfs_dev_extent *dext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7717) int slot = path->slots[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7718) u64 chunk_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7719) u64 physical_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7720) u64 physical_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7721) u64 devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7723) btrfs_item_key_to_cpu(leaf, &key, slot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7724) if (key.type != BTRFS_DEV_EXTENT_KEY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7725) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7726) devid = key.objectid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7727) physical_offset = key.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7729) dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7730) chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7731) physical_len = btrfs_dev_extent_length(leaf, dext);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7733) /* Check if this dev extent overlaps with the previous one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7734) if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7735) btrfs_err(fs_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7736) "dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7737) devid, physical_offset, prev_dev_ext_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7738) ret = -EUCLEAN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7739) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7742) ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7743) physical_offset, physical_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7744) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7745) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7746) prev_devid = devid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7747) prev_dev_ext_end = physical_offset + physical_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7749) ret = btrfs_next_item(root, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7750) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7751) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7752) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7753) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7754) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7758) /* Ensure all chunks have corresponding dev extents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7759) ret = verify_chunk_dev_extent_mapping(fs_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7760) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7761) btrfs_free_path(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7762) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7765) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7766) * Check whether the given block group or device is pinned by any inode being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7767) * used as a swapfile.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7768) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7769) bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7771) struct btrfs_swapfile_pin *sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7772) struct rb_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7774) spin_lock(&fs_info->swapfile_pins_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7775) node = fs_info->swapfile_pins.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7776) while (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7777) sp = rb_entry(node, struct btrfs_swapfile_pin, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7778) if (ptr < sp->ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7779) node = node->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7780) else if (ptr > sp->ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7781) node = node->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7782) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7783) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7785) spin_unlock(&fs_info->swapfile_pins_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7786) return node != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7787) }