^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * bcache setup/teardown code, and some metadata io - read a superblock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * figure out what to do with it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright 2012 Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "bcache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "btree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "debug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "extents.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "request.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "writeback.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "features.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/genhd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/idr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/reboot.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/sysfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) unsigned int bch_cutoff_writeback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) unsigned int bch_cutoff_writeback_sync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static const char bcache_magic[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) static const char invalid_uuid[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) 0xa0, 0x3e, 0xf8, 0xed, 0x3e, 0xe1, 0xb8, 0x78,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) 0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) static struct kobject *bcache_kobj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) struct mutex bch_register_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) bool bcache_is_reboot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) LIST_HEAD(bch_cache_sets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static LIST_HEAD(uncached_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static int bcache_major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static DEFINE_IDA(bcache_device_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static wait_queue_head_t unregister_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct workqueue_struct *bcache_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct workqueue_struct *bch_flush_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct workqueue_struct *bch_journal_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /* limitation of partitions number on single bcache device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #define BCACHE_MINORS 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) /* limitation of bcache devices number on single system */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #define BCACHE_DEVICE_IDX_MAX ((1U << MINORBITS)/BCACHE_MINORS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /* Superblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) unsigned int bucket_size = le16_to_cpu(s->bucket_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) if (bch_has_feature_large_bucket(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) unsigned int max, order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) max = sizeof(unsigned int) * BITS_PER_BYTE - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) order = le16_to_cpu(s->bucket_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * bcache tool will make sure the overflow won't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * happen, an error message here is enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) if (order > max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) pr_err("Bucket size (1 << %u) overflows\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) bucket_size = 1 << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) } else if (bch_has_feature_obso_large_bucket(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) bucket_size +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) le16_to_cpu(s->obso_bucket_size_hi) << 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) return bucket_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) static const char *read_super_common(struct cache_sb *sb, struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct cache_sb_disk *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) const char *err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) sb->first_bucket= le16_to_cpu(s->first_bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) sb->nbuckets = le64_to_cpu(s->nbuckets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) sb->bucket_size = get_bucket_size(sb, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) sb->nr_in_set = le16_to_cpu(s->nr_in_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) sb->nr_this_dev = le16_to_cpu(s->nr_this_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) err = "Too many journal buckets";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (sb->keys > SB_JOURNAL_BUCKETS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) err = "Too many buckets";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) if (sb->nbuckets > LONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) err = "Not enough buckets";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) if (sb->nbuckets < 1 << 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) err = "Bad block size (not power of 2)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (!is_power_of_2(sb->block_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) err = "Bad block size (larger than page size)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (sb->block_size > PAGE_SECTORS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) err = "Bad bucket size (not power of 2)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) if (!is_power_of_2(sb->bucket_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) err = "Bad bucket size (smaller than page size)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) if (sb->bucket_size < PAGE_SECTORS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) err = "Invalid superblock: device too small";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (get_capacity(bdev->bd_disk) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) sb->bucket_size * sb->nbuckets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) err = "Bad UUID";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (bch_is_zero(sb->set_uuid, 16))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) err = "Bad cache device number in set";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) if (!sb->nr_in_set ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) sb->nr_in_set <= sb->nr_this_dev ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) sb->nr_in_set > MAX_CACHES_PER_SET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) err = "Journal buckets not sequential";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) for (i = 0; i < sb->keys; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (sb->d[i] != sb->first_bucket + i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) err = "Too many journal buckets";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) if (sb->first_bucket + sb->keys > sb->nbuckets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) err = "Invalid superblock: first bucket comes before end of super";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) if (sb->first_bucket * sb->bucket_size < 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) err = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) struct cache_sb_disk **res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) const char *err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) struct cache_sb_disk *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) if (IS_ERR(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return "IO error";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) s = page_address(page) + offset_in_page(SB_OFFSET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) sb->offset = le64_to_cpu(s->offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) sb->version = le64_to_cpu(s->version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) memcpy(sb->magic, s->magic, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) memcpy(sb->uuid, s->uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) memcpy(sb->set_uuid, s->set_uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) memcpy(sb->label, s->label, SB_LABEL_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) sb->flags = le64_to_cpu(s->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) sb->seq = le64_to_cpu(s->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) sb->last_mount = le32_to_cpu(s->last_mount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) sb->keys = le16_to_cpu(s->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) sb->d[i] = le64_to_cpu(s->d[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) sb->version, sb->flags, sb->seq, sb->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) err = "Not a bcache superblock (bad offset)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (sb->offset != SB_SECTOR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) err = "Not a bcache superblock (bad magic)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) if (memcmp(sb->magic, bcache_magic, 16))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) err = "Bad checksum";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (s->csum != csum_set(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) err = "Bad UUID";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) if (bch_is_zero(sb->uuid, 16))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) sb->block_size = le16_to_cpu(s->block_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) err = "Superblock block size smaller than device block size";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) if (sb->block_size << 9 < bdev_logical_block_size(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) switch (sb->version) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) case BCACHE_SB_VERSION_BDEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) sb->data_offset = BDEV_DATA_START_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) case BCACHE_SB_VERSION_BDEV_WITH_FEATURES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) sb->data_offset = le64_to_cpu(s->data_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) err = "Bad data offset";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (sb->data_offset < BDEV_DATA_START_DEFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) case BCACHE_SB_VERSION_CDEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) case BCACHE_SB_VERSION_CDEV_WITH_UUID:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) err = read_super_common(sb, bdev, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) case BCACHE_SB_VERSION_CDEV_WITH_FEATURES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * Feature bits are needed in read_super_common(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * convert them firstly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) sb->feature_compat = le64_to_cpu(s->feature_compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) sb->feature_incompat = le64_to_cpu(s->feature_incompat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /* Check incompatible features */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) err = "Unsupported compatible feature found";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) if (bch_has_unknown_compat_features(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) err = "Unsupported read-only compatible feature found";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) if (bch_has_unknown_ro_compat_features(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) err = "Unsupported incompatible feature found";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) if (bch_has_unknown_incompat_features(sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) err = read_super_common(sb, bdev, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) err = "Unsupported superblock version";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) sb->last_mount = (u32)ktime_get_real_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) *res = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) static void write_bdev_super_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) struct cached_dev *dc = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) bch_count_backing_io_errors(dc, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) closure_put(&dc->sb_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) bio->bi_iter.bi_sector = SB_SECTOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) __bio_add_page(bio, virt_to_page(out), SB_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) offset_in_page(out));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) out->offset = cpu_to_le64(sb->offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) memcpy(out->uuid, sb->uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) memcpy(out->set_uuid, sb->set_uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) memcpy(out->label, sb->label, SB_LABEL_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) out->flags = cpu_to_le64(sb->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) out->seq = cpu_to_le64(sb->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) out->last_mount = cpu_to_le32(sb->last_mount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) out->first_bucket = cpu_to_le16(sb->first_bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) out->keys = cpu_to_le16(sb->keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) for (i = 0; i < sb->keys; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) out->d[i] = cpu_to_le64(sb->d[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) out->feature_compat = cpu_to_le64(sb->feature_compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) out->feature_incompat = cpu_to_le64(sb->feature_incompat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) out->feature_ro_compat = cpu_to_le64(sb->feature_ro_compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) out->version = cpu_to_le64(sb->version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) out->csum = csum_set(out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) pr_debug("ver %llu, flags %llu, seq %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) sb->version, sb->flags, sb->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static void bch_write_bdev_super_unlock(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) up(&dc->sb_write_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) struct closure *cl = &dc->sb_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) struct bio *bio = &dc->sb_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) down(&dc->sb_write_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) closure_init(cl, parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) bio_init(bio, dc->sb_bv, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) bio_set_dev(bio, dc->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) bio->bi_end_io = write_bdev_super_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) bio->bi_private = dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) closure_get(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) __write_super(&dc->sb, dc->sb_disk, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) static void write_super_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) struct cache *ca = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) /* is_read = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) bch_count_io_errors(ca, bio->bi_status, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) "writing superblock");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) closure_put(&ca->set->sb_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) static void bcache_write_super_unlock(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) struct cache_set *c = container_of(cl, struct cache_set, sb_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) up(&c->sb_write_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) void bcache_write_super(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) struct closure *cl = &c->sb_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) struct cache *ca = c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) struct bio *bio = &ca->sb_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) unsigned int version = BCACHE_SB_VERSION_CDEV_WITH_UUID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) down(&c->sb_write_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) closure_init(cl, &c->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) ca->sb.seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (ca->sb.version < version)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) ca->sb.version = version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) bio_init(bio, ca->sb_bv, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) bio_set_dev(bio, ca->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) bio->bi_end_io = write_super_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) bio->bi_private = ca;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) closure_get(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) __write_super(&ca->sb, ca->sb_disk, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) closure_return_with_destructor(cl, bcache_write_super_unlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) /* UUID io */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) static void uuid_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) struct closure *cl = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) cache_set_err_on(bio->bi_status, c, "accessing uuids");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) bch_bbio_free(bio, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) closure_put(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) static void uuid_io_unlock(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) up(&c->uuid_write_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) struct bkey *k, struct closure *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) struct closure *cl = &c->uuid_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) struct uuid_entry *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) char buf[80];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) BUG_ON(!parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) down(&c->uuid_write_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) closure_init(cl, parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) for (i = 0; i < KEY_PTRS(k); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct bio *bio = bch_bbio_alloc(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) bio->bi_opf = REQ_SYNC | REQ_META | op_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) bio->bi_end_io = uuid_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) bio->bi_private = cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) bch_bio_map(bio, c->uuids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) bch_submit_bbio(bio, c, k, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) if (op != REQ_OP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) bch_extent_to_text(buf, sizeof(buf), k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) if (!bch_is_zero(u->uuid, 16))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) u - c->uuids, u->uuid, u->label,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) u->first_reg, u->last_reg, u->invalidated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) closure_return_with_destructor(cl, uuid_io_unlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) struct bkey *k = &j->uuid_bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (__bch_btree_ptr_invalid(c, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) return "bad uuid pointer";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) bkey_copy(&c->uuid_bucket, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) uuid_io(c, REQ_OP_READ, 0, k, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) if (j->version < BCACHE_JSET_VERSION_UUIDv1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) struct uuid_entry_v0 *u0 = (void *) c->uuids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) struct uuid_entry *u1 = (void *) c->uuids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) closure_sync(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) * Since the new uuid entry is bigger than the old, we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) * convert starting at the highest memory address and work down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) * in order to do it in place
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) for (i = c->nr_uuids - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) i >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) --i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) memcpy(u1[i].uuid, u0[i].uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) memcpy(u1[i].label, u0[i].label, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) u1[i].first_reg = u0[i].first_reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) u1[i].last_reg = u0[i].last_reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) u1[i].invalidated = u0[i].invalidated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) u1[i].flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) u1[i].sectors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) static int __uuid_write(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) BKEY_PADDED(key) k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) struct cache *ca = c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) unsigned int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) closure_init_stack(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) lockdep_assert_held(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) SET_KEY_SIZE(&k.key, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) /* Only one bucket used for uuid write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) bkey_copy(&c->uuid_bucket, &k.key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) bkey_put(c, &k.key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) int bch_uuid_write(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) int ret = __uuid_write(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) bch_journal_meta(c, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) struct uuid_entry *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) for (u = c->uuids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) u < c->uuids + c->nr_uuids; u++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (!memcmp(u->uuid, uuid, 16))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) return u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) static struct uuid_entry *uuid_find_empty(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) return uuid_find(c, zero_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) * Bucket priorities/gens:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * For each bucket, we store on disk its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * 8 bit gen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * 16 bit priority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * See alloc.c for an explanation of the gen. The priority is used to implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * lru (and in the future other) cache replacement policies; for most purposes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * it's just an opaque integer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * The gens and the priorities don't have a whole lot to do with each other, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * it's actually the gens that must be written out at specific times - it's no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * big deal if the priorities don't get written, if we lose them we just reuse
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * buckets in suboptimal order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * On disk they're stored in a packed array, and in as many buckets are required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * to fit them all. The buckets we use to store them form a list; the journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * header points to the first bucket, the first bucket points to the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * bucket, et cetera.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * This code is used by the allocation code; periodically (whenever it runs out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * of buckets to allocate from) the allocation code will invalidate some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * buckets, but it can't use those buckets until their new gens are safely on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) static void prio_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) struct cache *ca = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) bch_bbio_free(bio, ca->set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) closure_put(&ca->prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) static void prio_io(struct cache *ca, uint64_t bucket, int op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) unsigned long op_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct closure *cl = &ca->prio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) struct bio *bio = bch_bbio_alloc(ca->set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) closure_init_stack(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) bio_set_dev(bio, ca->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) bio->bi_iter.bi_size = meta_bucket_bytes(&ca->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) bio->bi_end_io = prio_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) bio->bi_private = ca;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) bch_bio_map(bio, ca->disk_buckets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) closure_bio_submit(ca->set, bio, &ca->prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) closure_sync(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) int bch_prio_write(struct cache *ca, bool wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) struct bucket *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) fifo_used(&ca->free[RESERVE_PRIO]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) fifo_used(&ca->free[RESERVE_NONE]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) fifo_used(&ca->free_inc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * Pre-check if there are enough free buckets. In the non-blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * scenario it's better to fail early rather than starting to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * buckets and do a cleanup later in case of failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (!wait) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) fifo_used(&ca->free[RESERVE_NONE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) if (prio_buckets(ca) > avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) closure_init_stack(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) lockdep_assert_held(&ca->set->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) ca->disk_buckets->seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) &ca->meta_sectors_written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) for (i = prio_buckets(ca) - 1; i >= 0; --i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) long bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) struct prio_set *p = ca->disk_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) struct bucket_disk *d = p->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) struct bucket_disk *end = d + prios_per_bucket(ca);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) for (b = ca->buckets + i * prios_per_bucket(ca);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) b < ca->buckets + ca->sb.nbuckets && d < end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) b++, d++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) d->prio = cpu_to_le16(b->prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) d->gen = b->gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) p->next_bucket = ca->prio_buckets[i + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) p->magic = pset_magic(&ca->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) p->csum = bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) BUG_ON(bucket == -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) mutex_unlock(&ca->set->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) prio_io(ca, bucket, REQ_OP_WRITE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) mutex_lock(&ca->set->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) ca->prio_buckets[i] = bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) atomic_dec_bug(&ca->buckets[bucket].pin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) mutex_unlock(&ca->set->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) bch_journal_meta(ca->set, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) mutex_lock(&ca->set->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * Don't want the old priorities to get garbage collected until after we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * finish writing the new ones, and they're journalled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) for (i = 0; i < prio_buckets(ca); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) if (ca->prio_last_buckets[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) __bch_bucket_free(ca,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) &ca->buckets[ca->prio_last_buckets[i]]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) ca->prio_last_buckets[i] = ca->prio_buckets[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) static int prio_read(struct cache *ca, uint64_t bucket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) struct prio_set *p = ca->disk_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) struct bucket *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) unsigned int bucket_nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) int ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) for (b = ca->buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) b < ca->buckets + ca->sb.nbuckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) b++, d++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (d == end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) ca->prio_buckets[bucket_nr] = bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) ca->prio_last_buckets[bucket_nr] = bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) bucket_nr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) prio_io(ca, bucket, REQ_OP_READ, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) if (p->csum !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) pr_warn("bad csum reading priorities\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) if (p->magic != pset_magic(&ca->sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) pr_warn("bad magic reading priorities\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) bucket = p->next_bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) d = p->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) b->prio = le16_to_cpu(d->prio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) b->gen = b->last_gc = d->gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) /* Bcache device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) static int open_dev(struct block_device *b, fmode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) struct bcache_device *d = b->bd_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) closure_get(&d->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) static void release_dev(struct gendisk *b, fmode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) struct bcache_device *d = b->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) closure_put(&d->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) static int ioctl_dev(struct block_device *b, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) struct bcache_device *d = b->bd_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) return d->ioctl(d, mode, cmd, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) static const struct block_device_operations bcache_cached_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) .submit_bio = cached_dev_submit_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) .open = open_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) .release = release_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) .ioctl = ioctl_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) static const struct block_device_operations bcache_flash_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) .submit_bio = flash_dev_submit_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) .open = open_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) .release = release_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) .ioctl = ioctl_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) void bcache_device_stop(struct bcache_device *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * closure_fn set to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * - cached device: cached_dev_flush()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * - flash dev: flash_dev_flush()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) closure_queue(&d->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) static void bcache_device_unlink(struct bcache_device *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) lockdep_assert_held(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) struct cache *ca = d->c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) sysfs_remove_link(&d->c->kobj, d->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) sysfs_remove_link(&d->kobj, "cache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) bd_unlink_disk_holder(ca->bdev, d->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) struct cache *ca = c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) bd_link_disk_holder(ca->bdev, d->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) snprintf(d->name, BCACHEDEVNAME_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) "%s%u", name, d->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) pr_err("Couldn't create device -> cache set symlink\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) pr_err("Couldn't create cache set -> device symlink\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) static void bcache_device_detach(struct bcache_device *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) lockdep_assert_held(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) atomic_dec(&d->c->attached_dev_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) struct uuid_entry *u = d->c->uuids + d->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) SET_UUID_FLASH_ONLY(u, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) memcpy(u->uuid, invalid_uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) bch_uuid_write(d->c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) bcache_device_unlink(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) d->c->devices[d->id] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) closure_put(&d->c->caching);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) d->c = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) unsigned int id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) d->id = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) d->c = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) c->devices[id] = d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) if (id >= c->devices_max_used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) c->devices_max_used = id + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) closure_get(&c->caching);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) static inline int first_minor_to_idx(int first_minor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) return (first_minor/BCACHE_MINORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) static inline int idx_to_first_minor(int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) return (idx * BCACHE_MINORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) static void bcache_device_free(struct bcache_device *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) struct gendisk *disk = d->disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) lockdep_assert_held(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) pr_info("%s stopped\n", disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) pr_err("bcache device (NULL gendisk) stopped\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) if (d->c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) bcache_device_detach(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) if (disk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) if (disk_added)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) del_gendisk(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) if (disk->queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) blk_cleanup_queue(disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) ida_simple_remove(&bcache_device_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) first_minor_to_idx(disk->first_minor));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) if (disk_added)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) put_disk(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) bioset_exit(&d->bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) kvfree(d->full_dirty_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) kvfree(d->stripe_sectors_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) closure_debug_destroy(&d->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) sector_t sectors, struct block_device *cached_bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) const struct block_device_operations *ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) const size_t max_stripes = min_t(size_t, INT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) SIZE_MAX / sizeof(atomic_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) uint64_t n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (!d->stripe_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) d->stripe_size = 1 << 31;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) n = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) if (!n || n > max_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) pr_err("nr_stripes too large or invalid: %llu (start sector beyond end of disk?)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) d->nr_stripes = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) n = d->nr_stripes * sizeof(atomic_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) if (!d->stripe_sectors_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (!d->full_dirty_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) goto out_free_stripe_sectors_dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) idx = ida_simple_get(&bcache_device_idx, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) if (idx < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) goto out_free_full_dirty_stripes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) goto out_ida_remove;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) d->disk = alloc_disk(BCACHE_MINORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) if (!d->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) goto out_bioset_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) set_capacity(d->disk, sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) d->disk->major = bcache_major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) d->disk->first_minor = idx_to_first_minor(idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) d->disk->fops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) d->disk->private_data = d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) q = blk_alloc_queue(NUMA_NO_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) d->disk->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) q->limits.max_hw_sectors = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) q->limits.max_sectors = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) q->limits.max_segment_size = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) q->limits.max_segments = BIO_MAX_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) blk_queue_max_discard_sectors(q, UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) q->limits.discard_granularity = 512;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) q->limits.io_min = block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) q->limits.logical_block_size = block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) q->limits.physical_block_size = block_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * This should only happen with BCACHE_SB_VERSION_BDEV.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * Block/page size is checked for BCACHE_SB_VERSION_CDEV.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) d->disk->disk_name, q->limits.logical_block_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) PAGE_SIZE, bdev_logical_block_size(cached_bdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) /* This also adjusts physical block size/min io size if needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) blk_queue_write_cache(q, true, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) out_bioset_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) bioset_exit(&d->bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) out_ida_remove:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) ida_simple_remove(&bcache_device_idx, idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) out_free_full_dirty_stripes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) kvfree(d->full_dirty_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) out_free_stripe_sectors_dirty:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) kvfree(d->stripe_sectors_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) /* Cached device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) static void calc_cached_dev_sectors(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) uint64_t sectors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) struct cached_dev *dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) list_for_each_entry(dc, &c->cached_devs, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) sectors += bdev_sectors(dc->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) c->cached_dev_sectors = sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) #define BACKING_DEV_OFFLINE_TIMEOUT 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) static int cached_dev_status_update(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) struct cached_dev *dc = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) * If this delayed worker is stopping outside, directly quit here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) * dc->io_disable might be set via sysfs interface, so check it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) * here too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) while (!kthread_should_stop() && !dc->io_disable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) q = bdev_get_queue(dc->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (blk_queue_dying(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) dc->offline_seconds++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) dc->offline_seconds = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) pr_err("%s: device offline for %d seconds\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) dc->backing_dev_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) BACKING_DEV_OFFLINE_TIMEOUT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) pr_err("%s: disable I/O request due to backing device offline\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) dc->disk.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) dc->io_disable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) /* let others know earlier that io_disable is true */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) bcache_device_stop(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) schedule_timeout_interruptible(HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) wait_for_kthread_stop();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) int bch_cached_dev_run(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) struct bcache_device *d = &dc->disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) char *env[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) "DRIVER=bcache",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf ? : ""),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) if (dc->io_disable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) pr_err("I/O disabled on cached dev %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) kfree(env[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) kfree(env[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (atomic_xchg(&dc->running, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) kfree(env[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) kfree(env[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) pr_info("cached dev %s is running already\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) if (!d->c &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) closure_init_stack(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) bch_write_bdev_super(dc, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) add_disk(d->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) bd_link_disk_holder(dc->bdev, dc->disk.disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) * won't show up in the uevent file, use udevadm monitor -e instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) * only class / kset properties are persistent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) kfree(env[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) kfree(env[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) sysfs_create_link(&disk_to_dev(d->disk)->kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) &d->kobj, "bcache")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) dc->status_update_thread = kthread_run(cached_dev_status_update,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) dc, "bcache_status_update");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) if (IS_ERR(dc->status_update_thread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * work dc->writeback_rate_update is running. Wait until the routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * seconds, give up waiting here and continue to cancel it too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) &dc->disk.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) time_out--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) schedule_timeout_interruptible(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) } while (time_out > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (time_out == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) pr_warn("give up waiting for dc->writeback_write_update to quit\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) cancel_delayed_work_sync(&dc->writeback_rate_update);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) static void cached_dev_detach_finish(struct work_struct *w)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) struct cached_dev *dc = container_of(w, struct cached_dev, detach);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) closure_init_stack(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) BUG_ON(refcount_read(&dc->count));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) cancel_writeback_rate_update_dwork(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) kthread_stop(dc->writeback_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) dc->writeback_thread = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) memset(&dc->sb.set_uuid, 0, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) bch_write_bdev_super(dc, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) calc_cached_dev_sectors(dc->disk.c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) bcache_device_detach(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) list_move(&dc->list, &uncached_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) pr_info("Caching disabled for %s\n", dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) /* Drop ref we took in cached_dev_detach() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) closure_put(&dc->disk.cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) void bch_cached_dev_detach(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) lockdep_assert_held(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * Block the device from being closed and freed until we're finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * detaching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) closure_get(&dc->disk.cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) bch_writeback_queue(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) cached_dev_put(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) uint8_t *set_uuid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) struct uuid_entry *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) struct cached_dev *exist_dc, *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) if ((set_uuid && memcmp(set_uuid, c->set_uuid, 16)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) (!set_uuid && memcmp(dc->sb.set_uuid, c->set_uuid, 16)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) if (dc->disk.c) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) pr_err("Can't attach %s: already attached\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) pr_err("Can't attach %s: shutting down\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) if (dc->sb.block_size < c->cache->sb.block_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) /* Will die */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) pr_err("Couldn't attach %s: block size less than set's block size\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) /* Check whether already attached */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) pr_err("Tried to attach %s but duplicate UUID already attached\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) u = uuid_find(c, dc->sb.uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) if (u &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) memcpy(u->uuid, invalid_uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) u = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) if (!u) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) pr_err("Couldn't find uuid for %s in set\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) u = uuid_find_empty(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (!u) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) pr_err("Not caching %s, no room for UUID\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) * Deadlocks since we're called via sysfs...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) * sysfs_remove_file(&dc->kobj, &sysfs_attach);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) if (bch_is_zero(u->uuid, 16)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) closure_init_stack(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) memcpy(u->uuid, dc->sb.uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) memcpy(u->label, dc->sb.label, SB_LABEL_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) u->first_reg = u->last_reg = rtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) bch_uuid_write(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) memcpy(dc->sb.set_uuid, c->set_uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) bch_write_bdev_super(dc, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) u->last_reg = rtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) bch_uuid_write(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) bcache_device_attach(&dc->disk, c, u - c->uuids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) list_move(&dc->list, &c->cached_devs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) calc_cached_dev_sectors(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * dc->c must be set before dc->count != 0 - paired with the mb in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) * cached_dev_get()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) refcount_set(&dc->count, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) /* Block writeback thread, but spawn it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) down_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) if (bch_cached_dev_writeback_start(dc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) up_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) pr_err("Couldn't start writeback facilities for %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) dc->disk.disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) atomic_set(&dc->has_dirty, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) bch_writeback_queue(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) bch_sectors_dirty_init(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) ret = bch_cached_dev_run(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) if (ret && (ret != -EBUSY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) up_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * bch_register_lock is held, bcache_device_stop() is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) * able to be directly called. The kthread and kworker
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) * created previously in bch_cached_dev_writeback_start()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) * have to be stopped manually here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) kthread_stop(dc->writeback_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) cancel_writeback_rate_update_dwork(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) pr_err("Couldn't run cached device %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) bcache_device_link(&dc->disk, c, "bdev");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) atomic_inc(&c->attached_dev_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) pr_err("Please update to the latest bcache-tools to create the cache device\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) set_disk_ro(dc->disk.disk, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) /* Allow the writeback thread to proceed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) up_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) pr_info("Caching %s as %s on set %pU\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) dc->backing_dev_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) dc->disk.disk->disk_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) dc->disk.c->set_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) /* when dc->disk.kobj released */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) void bch_cached_dev_release(struct kobject *kobj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) struct cached_dev *dc = container_of(kobj, struct cached_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) disk.kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) kfree(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) static void cached_dev_free(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) cancel_writeback_rate_update_dwork(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (!IS_ERR_OR_NULL(dc->writeback_thread))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) kthread_stop(dc->writeback_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) if (!IS_ERR_OR_NULL(dc->status_update_thread))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) kthread_stop(dc->status_update_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) if (atomic_read(&dc->running))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) bcache_device_free(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) list_del(&dc->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) if (dc->sb_disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) put_page(virt_to_page(dc->sb_disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) if (!IS_ERR_OR_NULL(dc->bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) wake_up(&unregister_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) kobject_put(&dc->disk.kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) static void cached_dev_flush(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) struct bcache_device *d = &dc->disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) bcache_device_unlink(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) bch_cache_accounting_destroy(&dc->accounting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) kobject_del(&d->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) continue_at(cl, cached_dev_free, system_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) struct io *io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) struct request_queue *q = bdev_get_queue(dc->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) __module_get(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) INIT_LIST_HEAD(&dc->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) closure_init(&dc->disk.cl, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) INIT_WORK(&dc->detach, cached_dev_detach_finish);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) sema_init(&dc->sb_write_mutex, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) INIT_LIST_HEAD(&dc->io_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) spin_lock_init(&dc->io_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) dc->sequential_cutoff = 4 << 20;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) for (io = dc->io; io < dc->io + RECENT_IO; io++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) list_add(&io->lru, &dc->io_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) dc->disk.stripe_size = q->limits.io_opt >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) if (dc->disk.stripe_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) dc->partial_stripes_expensive =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) q->limits.raid_partial_stripes_expensive;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) ret = bcache_device_init(&dc->disk, block_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) dc->bdev, &bcache_cached_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) blk_queue_io_opt(dc->disk.disk->queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) atomic_set(&dc->io_errors, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) dc->io_disable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) /* default to auto */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) bch_cached_dev_request_init(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) bch_cached_dev_writeback_init(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) /* Cached device - bcache superblock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) struct block_device *bdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) const char *err = "cannot allocate memory";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) struct cache_set *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) int ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) bdevname(bdev, dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) memcpy(&dc->sb, sb, sizeof(struct cache_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) dc->bdev = bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) dc->bdev->bd_holder = dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) dc->sb_disk = sb_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) if (cached_dev_init(dc, sb->block_size << 9))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) err = "error creating kobject";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) "bcache"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) pr_info("registered backing device %s\n", dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) list_add(&dc->list, &uncached_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) /* attach to a matched cache set if it exists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) list_for_each_entry(c, &bch_cache_sets, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) bch_cached_dev_attach(dc, c, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) err = "failed to run cached device";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) ret = bch_cached_dev_run(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) pr_notice("error %s: %s\n", dc->backing_dev_name, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) bcache_device_stop(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) /* Flash only volumes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) /* When d->kobj released */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) void bch_flash_dev_release(struct kobject *kobj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) struct bcache_device *d = container_of(kobj, struct bcache_device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) kfree(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) static void flash_dev_free(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) struct bcache_device *d = container_of(cl, struct bcache_device, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) atomic_long_sub(bcache_dev_sectors_dirty(d),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) &d->c->flash_dev_dirty_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) bcache_device_free(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) kobject_put(&d->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) static void flash_dev_flush(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) struct bcache_device *d = container_of(cl, struct bcache_device, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) bcache_device_unlink(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) kobject_del(&d->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) continue_at(cl, flash_dev_free, system_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) struct bcache_device *d = kzalloc(sizeof(struct bcache_device),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) if (!d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) closure_init(&d->cl, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) set_closure_fn(&d->cl, flash_dev_flush, system_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) kobject_init(&d->kobj, &bch_flash_dev_ktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) if (bcache_device_init(d, block_bytes(c->cache), u->sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) NULL, &bcache_flash_ops))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) bcache_device_attach(d, c, u - c->uuids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) bch_sectors_dirty_init(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) bch_flash_dev_request_init(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) add_disk(d->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) bcache_device_link(d, c, "volume");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) if (bch_has_feature_obso_large_bucket(&c->cache->sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) pr_err("Please update to the latest bcache-tools to create the cache device\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) set_disk_ro(d->disk, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) kobject_put(&d->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) static int flash_devs_run(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) struct uuid_entry *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) for (u = c->uuids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) u < c->uuids + c->nr_uuids && !ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) u++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) if (UUID_FLASH_ONLY(u))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) ret = flash_dev_run(c, u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) int bch_flash_dev_create(struct cache_set *c, uint64_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) struct uuid_entry *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) if (test_bit(CACHE_SET_STOPPING, &c->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) if (!test_bit(CACHE_SET_RUNNING, &c->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) u = uuid_find_empty(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) if (!u) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) pr_err("Can't create volume, no room for UUID\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) get_random_bytes(u->uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) memset(u->label, 0, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) SET_UUID_FLASH_ONLY(u, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) u->sectors = size >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) bch_uuid_write(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) return flash_dev_run(c, u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) bool bch_cached_dev_error(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) dc->io_disable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) /* make others know io_disable is true earlier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) pr_err("stop %s: too many IO errors on backing device %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) dc->disk.disk->disk_name, dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) bcache_device_stop(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) /* Cache set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) __printf(2, 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) struct va_format vaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) va_list args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) if (c->on_error != ON_ERROR_PANIC &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) test_bit(CACHE_SET_STOPPING, &c->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) pr_info("CACHE_SET_IO_DISABLE already set\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) * XXX: we can be called from atomic context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) * acquire_console_sem();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) va_start(args, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) vaf.fmt = fmt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) vaf.va = &args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) pr_err("error on %pU: %pV, disabling caching\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) c->set_uuid, &vaf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) va_end(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) if (c->on_error == ON_ERROR_PANIC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) panic("panic forced after error\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) bch_cache_set_unregister(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) /* When c->kobj released */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) void bch_cache_set_release(struct kobject *kobj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) struct cache_set *c = container_of(kobj, struct cache_set, kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) kfree(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) static void cache_set_free(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) struct cache_set *c = container_of(cl, struct cache_set, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) struct cache *ca;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) debugfs_remove(c->debug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) bch_open_buckets_free(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) bch_btree_cache_free(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) bch_journal_free(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) bch_bset_sort_state_free(&c->sort);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) ca = c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) if (ca) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) ca->set = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) c->cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) kobject_put(&ca->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) if (c->moving_gc_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) destroy_workqueue(c->moving_gc_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) bioset_exit(&c->bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) mempool_exit(&c->fill_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) mempool_exit(&c->bio_meta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) mempool_exit(&c->search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) kfree(c->devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) list_del(&c->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) pr_info("Cache set %pU unregistered\n", c->set_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) wake_up(&unregister_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) closure_debug_destroy(&c->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) kobject_put(&c->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) static void cache_set_flush(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) struct cache_set *c = container_of(cl, struct cache_set, caching);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) struct cache *ca = c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) struct btree *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) bch_cache_accounting_destroy(&c->accounting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) kobject_put(&c->internal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) kobject_del(&c->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) if (!IS_ERR_OR_NULL(c->gc_thread))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) kthread_stop(c->gc_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) if (!IS_ERR_OR_NULL(c->root))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) list_add(&c->root->list, &c->btree_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) * Avoid flushing cached nodes if cache set is retiring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) * due to too many I/O errors detected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) list_for_each_entry(b, &c->btree_cache, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) mutex_lock(&b->write_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) if (btree_node_dirty(b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) __bch_btree_node_write(b, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) mutex_unlock(&b->write_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) if (ca->alloc_thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) kthread_stop(ca->alloc_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) if (c->journal.cur) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) cancel_delayed_work_sync(&c->journal.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) /* flush last journal entry if needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) c->journal.work.work.func(&c->journal.work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) closure_return(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) * This function is only called when CACHE_SET_IO_DISABLE is set, which means
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) * cache set is unregistering due to too many I/O errors. In this condition,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) * the bcache device might be stopped, it depends on stop_when_cache_set_failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) * value and whether the broken cache has dirty data:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * BCH_CACHED_STOP_AUTO 0 NO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) * BCH_CACHED_STOP_AUTO 1 YES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) * BCH_CACHED_DEV_STOP_ALWAYS 0 YES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) * BCH_CACHED_DEV_STOP_ALWAYS 1 YES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) * The expected behavior is, if stop_when_cache_set_failed is configured to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * "auto" via sysfs interface, the bcache device will not be stopped if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) * backing device is clean on the broken cache device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) static void conditional_stop_bcache_device(struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) struct bcache_device *d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) d->disk->disk_name, c->set_uuid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) bcache_device_stop(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) } else if (atomic_read(&dc->has_dirty)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) * and dc->has_dirty == 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) d->disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) * There might be a small time gap that cache set is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) * released but bcache device is not. Inside this time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) * gap, regular I/O requests will directly go into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) * backing device as no cache set attached to. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * behavior may also introduce potential inconsistence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) * data in writeback mode while cache is dirty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) * Therefore before calling bcache_device_stop() due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) * to a broken cache device, dc->io_disable should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) * explicitly set to true.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) dc->io_disable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) /* make others know io_disable is true earlier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) bcache_device_stop(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) * and dc->has_dirty == 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) d->disk->disk_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) static void __cache_set_unregister(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) struct cache_set *c = container_of(cl, struct cache_set, caching);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) struct cached_dev *dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) struct bcache_device *d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) for (i = 0; i < c->devices_max_used; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) d = c->devices[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) if (!d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) dc = container_of(d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) bch_cached_dev_detach(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) conditional_stop_bcache_device(c, d, dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) bcache_device_stop(d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) continue_at(cl, cache_set_flush, system_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) void bch_cache_set_stop(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) /* closure_fn set to __cache_set_unregister() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) closure_queue(&c->caching);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) void bch_cache_set_unregister(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) set_bit(CACHE_SET_UNREGISTERING, &c->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) bch_cache_set_stop(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) #define alloc_meta_bucket_pages(gfp, sb) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(meta_bucket_pages(sb))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) int iter_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) struct cache *ca = container_of(sb, struct cache, sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) if (!c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) __module_get(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) closure_init(&c->cl, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) set_closure_fn(&c->cl, cache_set_free, system_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) closure_init(&c->caching, &c->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) set_closure_fn(&c->caching, __cache_set_unregister, system_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) /* Maybe create continue_at_noreturn() and use it here? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) closure_set_stopped(&c->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) closure_put(&c->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) kobject_init(&c->kobj, &bch_cache_set_ktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) kobject_init(&c->internal, &bch_cache_set_internal_ktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) bch_cache_accounting_init(&c->accounting, &c->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) memcpy(c->set_uuid, sb->set_uuid, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) c->cache = ca;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) c->cache->set = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) c->bucket_bits = ilog2(sb->bucket_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) c->block_bits = ilog2(sb->block_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) c->nr_uuids = meta_bucket_bytes(sb) / sizeof(struct uuid_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) c->devices_max_used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) atomic_set(&c->attached_dev_nr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) c->btree_pages = meta_bucket_pages(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) if (c->btree_pages > BTREE_MAX_PAGES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) c->btree_pages = max_t(int, c->btree_pages / 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) BTREE_MAX_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) sema_init(&c->sb_write_mutex, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) mutex_init(&c->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) init_waitqueue_head(&c->btree_cache_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) spin_lock_init(&c->btree_cannibalize_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) init_waitqueue_head(&c->bucket_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) init_waitqueue_head(&c->gc_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) sema_init(&c->uuid_write_mutex, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) spin_lock_init(&c->btree_gc_time.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) spin_lock_init(&c->btree_split_time.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) spin_lock_init(&c->btree_read_time.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) bch_moving_init_cache_set(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) INIT_LIST_HEAD(&c->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) INIT_LIST_HEAD(&c->cached_devs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) INIT_LIST_HEAD(&c->btree_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) INIT_LIST_HEAD(&c->btree_cache_freeable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) INIT_LIST_HEAD(&c->btree_cache_freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) INIT_LIST_HEAD(&c->data_buckets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) sizeof(struct btree_iter_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) if (!c->devices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (mempool_init_slab_pool(&c->search, 32, bch_search_cache))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) if (mempool_init_kmalloc_pool(&c->bio_meta, 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) sizeof(struct bbio) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) sizeof(struct bio_vec) * meta_bucket_pages(sb)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) if (!c->uuids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) if (!c->moving_gc_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) if (bch_journal_alloc(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) if (bch_btree_cache_alloc(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) if (bch_open_buckets_alloc(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) if (bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) c->congested_read_threshold_us = 2000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) c->congested_write_threshold_us = 20000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) c->error_limit = DEFAULT_IO_ERROR_LIMIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) c->idle_max_writeback_rate_enabled = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) bch_cache_set_unregister(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) static int run_cache_set(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) const char *err = "cannot allocate memory";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) struct cached_dev *dc, *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) struct cache *ca = c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) LIST_HEAD(journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) struct journal_replay *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) closure_init_stack(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) c->nbuckets = ca->sb.nbuckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) set_gc_sectors(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) if (CACHE_SYNC(&c->cache->sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) struct bkey *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) struct jset *j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) err = "cannot allocate memory for journal";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) if (bch_journal_read(c, &journal))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) pr_debug("btree_journal_read() done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) err = "no journal entries found";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) if (list_empty(&journal))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) j = &list_entry(journal.prev, struct journal_replay, list)->j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) err = "IO error reading priorities";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) * If prio_read() fails it'll call cache_set_error and we'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) * tear everything down right away, but if we perhaps checked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) * sooner we could avoid journal replay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) k = &j->btree_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) err = "bad btree root";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) if (__bch_btree_ptr_invalid(c, k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) err = "error reading btree root";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) c->root = bch_btree_node_get(c, NULL, k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) j->btree_level,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) true, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) if (IS_ERR_OR_NULL(c->root))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) list_del_init(&c->root->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) rw_unlock(true, c->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) err = uuid_read(c, j, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) err = "error in recovery";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) if (bch_btree_check(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) bch_journal_mark(c, &journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) bch_initial_gc_finish(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) pr_debug("btree_check() done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) * bcache_journal_next() can't happen sooner, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) * btree_gc_finish() will give spurious errors about last_gc >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) * gc_gen - this is a hack but oh well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) bch_journal_next(&c->journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) err = "error starting allocator thread";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) if (bch_cache_allocator_start(ca))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) * First place it's safe to allocate: btree_check() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) * btree_gc_finish() have to run before we have buckets to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) * allocate, and bch_bucket_alloc_set() might cause a journal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) * entry to be written so bcache_journal_next() has to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) * first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) * If the uuids were in the old format we have to rewrite them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) * before the next journal entry is written:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) if (j->version < BCACHE_JSET_VERSION_UUID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) __uuid_write(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) err = "bcache: replay journal failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) if (bch_journal_replay(c, &journal))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) unsigned int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) pr_notice("invalidating existing data\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 2, SB_JOURNAL_BUCKETS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) for (j = 0; j < ca->sb.keys; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) ca->sb.d[j] = ca->sb.first_bucket + j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) bch_initial_gc_finish(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) err = "error starting allocator thread";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) if (bch_cache_allocator_start(ca))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) mutex_lock(&c->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) bch_prio_write(ca, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) mutex_unlock(&c->bucket_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) err = "cannot allocate new UUID bucket";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) if (__uuid_write(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) err = "cannot allocate new btree root";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) if (IS_ERR_OR_NULL(c->root))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) mutex_lock(&c->root->write_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) bkey_copy_key(&c->root->key, &MAX_KEY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) bch_btree_node_write(c->root, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) mutex_unlock(&c->root->write_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) bch_btree_set_root(c->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) rw_unlock(true, c->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) * We don't want to write the first journal entry until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) * everything is set up - fortunately journal entries won't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) * written until the SET_CACHE_SYNC() here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) SET_CACHE_SYNC(&c->cache->sb, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) bch_journal_next(&c->journal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) bch_journal_meta(c, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) err = "error starting gc thread";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) if (bch_gc_thread_start(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) c->cache->sb.last_mount = (u32)ktime_get_real_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) bcache_write_super(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) if (bch_has_feature_obso_large_bucket(&c->cache->sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) list_for_each_entry_safe(dc, t, &uncached_devices, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) bch_cached_dev_attach(dc, c, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) flash_devs_run(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) set_bit(CACHE_SET_RUNNING, &c->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) while (!list_empty(&journal)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) l = list_first_entry(&journal, struct journal_replay, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) list_del(&l->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) kfree(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) bch_cache_set_error(c, "%s", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) static const char *register_cache_set(struct cache *ca)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) char buf[12];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) const char *err = "cannot allocate memory";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) struct cache_set *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) list_for_each_entry(c, &bch_cache_sets, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) if (!memcmp(c->set_uuid, ca->sb.set_uuid, 16)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) if (c->cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) return "duplicate cache set member";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) c = bch_cache_set_alloc(&ca->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) if (!c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) err = "error creating kobject";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->set_uuid) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) kobject_add(&c->internal, &c->kobj, "internal"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) bch_debug_init_cache_set(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) list_add(&c->list, &bch_cache_sets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) sprintf(buf, "cache%i", ca->sb.nr_this_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) if (sysfs_create_link(&ca->kobj, &c->kobj, "set") ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) sysfs_create_link(&c->kobj, &ca->kobj, buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) kobject_get(&ca->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) ca->set = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) ca->set->cache = ca;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) err = "failed to run cache set";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) if (run_cache_set(c) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) bch_cache_set_unregister(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) /* Cache device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) /* When ca->kobj released */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) void bch_cache_release(struct kobject *kobj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) struct cache *ca = container_of(kobj, struct cache, kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) if (ca->set) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) BUG_ON(ca->set->cache != ca);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) ca->set->cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) free_pages((unsigned long) ca->disk_buckets, ilog2(meta_bucket_pages(&ca->sb)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) kfree(ca->prio_buckets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) vfree(ca->buckets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) free_heap(&ca->heap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) free_fifo(&ca->free_inc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) for (i = 0; i < RESERVE_NR; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) free_fifo(&ca->free[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) if (ca->sb_disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) put_page(virt_to_page(ca->sb_disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) if (!IS_ERR_OR_NULL(ca->bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) kfree(ca);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) static int cache_alloc(struct cache *ca)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) size_t free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) size_t btree_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) struct bucket *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) int ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) const char *err = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) __module_get(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) kobject_init(&ca->kobj, &bch_cache_ktype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * when ca->sb.njournal_buckets is not zero, journal exists,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * and in bch_journal_replay(), tree node may split,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * so bucket of RESERVE_BTREE type is needed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * the worst situation is all journal buckets are valid journal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * and all the keys need to replay,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) * so the number of RESERVE_BTREE type buckets should be as much
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) * as journal buckets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) btree_buckets = ca->sb.njournal_buckets ?: 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) if (!free) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) ret = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) err = "ca->sb.nbuckets is too small";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) err = "ca->free[RESERVE_BTREE] alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) goto err_btree_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) err = "ca->free[RESERVE_PRIO] alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) goto err_prio_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) err = "ca->free[RESERVE_MOVINGGC] alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) goto err_movinggc_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) err = "ca->free[RESERVE_NONE] alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) goto err_none_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) err = "ca->free_inc alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) goto err_free_inc_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) err = "ca->heap alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) goto err_heap_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) ca->buckets = vzalloc(array_size(sizeof(struct bucket),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) ca->sb.nbuckets));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (!ca->buckets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) err = "ca->buckets alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) goto err_buckets_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) prio_buckets(ca), 2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) if (!ca->prio_buckets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) err = "ca->prio_buckets alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) goto err_prio_buckets_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) ca->disk_buckets = alloc_meta_bucket_pages(GFP_KERNEL, &ca->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) if (!ca->disk_buckets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) err = "ca->disk_buckets alloc failed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) goto err_disk_buckets_alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) for_each_bucket(b, ca)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) atomic_set(&b->pin, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) err_disk_buckets_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) kfree(ca->prio_buckets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) err_prio_buckets_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) vfree(ca->buckets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) err_buckets_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) free_heap(&ca->heap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) err_heap_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) free_fifo(&ca->free_inc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) err_free_inc_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) free_fifo(&ca->free[RESERVE_NONE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) err_none_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) free_fifo(&ca->free[RESERVE_MOVINGGC]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) err_movinggc_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) free_fifo(&ca->free[RESERVE_PRIO]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) err_prio_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) free_fifo(&ca->free[RESERVE_BTREE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) err_btree_alloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) err_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) pr_notice("error %s: %s\n", ca->cache_dev_name, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) struct block_device *bdev, struct cache *ca)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) const char *err = NULL; /* must be set for any error case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) bdevname(bdev, ca->cache_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) memcpy(&ca->sb, sb, sizeof(struct cache_sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) ca->bdev = bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) ca->bdev->bd_holder = ca;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) ca->sb_disk = sb_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) if (blk_queue_discard(bdev_get_queue(bdev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) ca->discard = CACHE_DISCARD(&ca->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) ret = cache_alloc(ca);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) if (ret != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) * If we failed here, it means ca->kobj is not initialized yet,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) * kobject_put() won't be called and there is no chance to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) * call blkdev_put() to bdev in bch_cache_release(). So we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) * explicitly call blkdev_put() here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) if (ret == -ENOMEM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) err = "cache_alloc(): -ENOMEM";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) else if (ret == -EPERM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) err = "cache_alloc(): cache device is too small";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) err = "cache_alloc(): unknown error";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) if (kobject_add(&ca->kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) &part_to_dev(bdev->bd_part)->kobj,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) "bcache")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) err = "error calling kobject_add";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) err = register_cache_set(ca);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) pr_info("registered cache device %s\n", ca->cache_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) kobject_put(&ca->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) pr_notice("error %s: %s\n", ca->cache_dev_name, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) /* Global interfaces/init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) const char *buffer, size_t size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) struct kobj_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) const char *buffer, size_t size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) kobj_attribute_write(register, register_bcache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) kobj_attribute_write(register_quiet, register_bcache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) static bool bch_is_open_backing(struct block_device *bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) struct cache_set *c, *tc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) struct cached_dev *dc, *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) list_for_each_entry_safe(dc, t, &c->cached_devs, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) if (dc->bdev == bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) list_for_each_entry_safe(dc, t, &uncached_devices, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) if (dc->bdev == bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) static bool bch_is_open_cache(struct block_device *bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) struct cache_set *c, *tc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) struct cache *ca = c->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) if (ca->bdev == bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) static bool bch_is_open(struct block_device *bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) struct async_reg_args {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) struct delayed_work reg_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) struct cache_sb *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) struct cache_sb_disk *sb_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) struct block_device *bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) static void register_bdev_worker(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) int fail = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) struct async_reg_args *args =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) container_of(work, struct async_reg_args, reg_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) struct cached_dev *dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) dc = kzalloc(sizeof(*dc), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) if (!dc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) fail = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) put_page(virt_to_page(args->sb_disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) fail = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) if (fail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) pr_info("error %s: fail to register backing device\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) args->path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) kfree(args->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) kfree(args->path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) kfree(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) static void register_cache_worker(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) int fail = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) struct async_reg_args *args =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) container_of(work, struct async_reg_args, reg_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) struct cache *ca;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) ca = kzalloc(sizeof(*ca), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) if (!ca) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) fail = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) put_page(virt_to_page(args->sb_disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) /* blkdev_put() will be called in bch_cache_release() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) fail = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) if (fail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) pr_info("error %s: fail to register cache device\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) args->path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) kfree(args->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) kfree(args->path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) kfree(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) static void register_device_aync(struct async_reg_args *args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) if (SB_IS_BDEV(args->sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) INIT_DELAYED_WORK(&args->reg_work, register_cache_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) /* 10 jiffies is enough for a delay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) queue_delayed_work(system_wq, &args->reg_work, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) const char *buffer, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) const char *err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) char *path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) struct cache_sb *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) struct cache_sb_disk *sb_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) struct block_device *bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) bool async_registration = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) #ifdef CONFIG_BCACHE_ASYNC_REGISTRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) async_registration = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) err = "failed to reference bcache module";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) if (!try_module_get(THIS_MODULE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) /* For latest state of bcache_is_reboot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) err = "bcache is in reboot";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) if (bcache_is_reboot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) goto out_module_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) err = "cannot allocate memory";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) path = kstrndup(buffer, size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) goto out_module_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) if (!sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) goto out_free_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) err = "failed to open device";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) bdev = blkdev_get_by_path(strim(path),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) FMODE_READ|FMODE_WRITE|FMODE_EXCL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) if (IS_ERR(bdev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) if (bdev == ERR_PTR(-EBUSY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) bdev = lookup_bdev(strim(path));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) if (!IS_ERR(bdev) && bch_is_open(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) err = "device already registered";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) err = "device busy";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) if (!IS_ERR(bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) bdput(bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) if (attr == &ksysfs_register_quiet)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) goto out_free_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) err = "failed to set blocksize";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) if (set_blocksize(bdev, 4096))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) goto out_blkdev_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) err = read_super(sb, bdev, &sb_disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) goto out_blkdev_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) err = "failed to register device";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) if (async_registration) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) /* register in asynchronous way */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) struct async_reg_args *args =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) kzalloc(sizeof(struct async_reg_args), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) if (!args) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) err = "cannot allocate memory";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) goto out_put_sb_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) args->path = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) args->sb = sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) args->sb_disk = sb_disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) args->bdev = bdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) register_device_aync(args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) /* No wait and returns to user space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) goto async_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) if (SB_IS_BDEV(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) if (!dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) goto out_put_sb_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) ret = register_bdev(sb, sb_disk, bdev, dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) /* blkdev_put() will be called in cached_dev_free() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) goto out_free_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) if (!ca)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) goto out_put_sb_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) /* blkdev_put() will be called in bch_cache_release() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) if (register_cache(sb, sb_disk, bdev, ca) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) goto out_free_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) kfree(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) kfree(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) async_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) out_put_sb_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) put_page(virt_to_page(sb_disk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) out_blkdev_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) out_free_sb:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) kfree(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) out_free_path:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) kfree(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) out_module_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) module_put(THIS_MODULE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) pr_info("error %s: %s\n", path?path:"", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) struct pdev {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) struct cached_dev *dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) struct kobj_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) const char *buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) LIST_HEAD(pending_devs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) ssize_t ret = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) struct cached_dev *dc, *tdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) struct pdev *pdev, *tpdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) struct cache_set *c, *tc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) list_for_each_entry_safe(dc, tdc, &uncached_devices, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) pdev = kmalloc(sizeof(struct pdev), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) if (!pdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) pdev->dc = dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) list_add(&pdev->list, &pending_devs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) char *pdev_set_uuid = pdev->dc->sb.set_uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) char *set_uuid = c->set_uuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) if (!memcmp(pdev_set_uuid, set_uuid, 16)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) list_del(&pdev->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) kfree(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) pr_info("delete pdev %p\n", pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) list_del(&pdev->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) bcache_device_stop(&pdev->dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) kfree(pdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) if (bcache_is_reboot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) if (code == SYS_DOWN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) code == SYS_HALT ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) code == SYS_POWER_OFF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) unsigned long start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) bool stopped = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) struct cache_set *c, *tc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) struct cached_dev *dc, *tdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) if (bcache_is_reboot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) /* New registration is rejected since now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) bcache_is_reboot = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) * Make registering caller (if there is) on other CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) * core know bcache_is_reboot set to true earlier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) if (list_empty(&bch_cache_sets) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) list_empty(&uncached_devices))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) pr_info("Stopping all devices:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) * The reason bch_register_lock is not held to call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) * bch_cache_set_stop() and bcache_device_stop() is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) * avoid potential deadlock during reboot, because cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) * set or bcache device stopping process will acqurie
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) * bch_register_lock too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) * We are safe here because bcache_is_reboot sets to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) * true already, register_bcache() will reject new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) * registration now. bcache_is_reboot also makes sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) * bcache_reboot() won't be re-entered on by other thread,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) * so there is no race in following list iteration by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) * list_for_each_entry_safe().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) bch_cache_set_stop(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) list_for_each_entry_safe(dc, tdc, &uncached_devices, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) bcache_device_stop(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) * Give an early chance for other kthreads and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) * kworkers to stop themselves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) /* What's a condition variable? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) long timeout = start + 10 * HZ - jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) mutex_lock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) stopped = list_empty(&bch_cache_sets) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) list_empty(&uncached_devices);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) if (timeout < 0 || stopped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) prepare_to_wait(&unregister_wait, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) schedule_timeout(timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) finish_wait(&unregister_wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) if (stopped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) pr_info("All devices stopped\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) pr_notice("Timeout waiting for devices to be closed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) static struct notifier_block reboot = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) .notifier_call = bcache_reboot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) .priority = INT_MAX, /* before any real devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) static void bcache_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) bch_debug_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) bch_request_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) if (bcache_kobj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) kobject_put(bcache_kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) if (bcache_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) destroy_workqueue(bcache_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) if (bch_journal_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) destroy_workqueue(bch_journal_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) if (bch_flush_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) destroy_workqueue(bch_flush_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) bch_btree_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) if (bcache_major)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) unregister_blkdev(bcache_major, "bcache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) unregister_reboot_notifier(&reboot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) mutex_destroy(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) /* Check and fixup module parameters */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) static void check_module_parameters(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) if (bch_cutoff_writeback_sync == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) if (bch_cutoff_writeback == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) bch_cutoff_writeback = CUTOFF_WRITEBACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) pr_warn("set bch_cutoff_writeback (%u) to max value %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) pr_warn("set bch_cutoff_writeback (%u) to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) bch_cutoff_writeback, bch_cutoff_writeback_sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) bch_cutoff_writeback = bch_cutoff_writeback_sync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) static int __init bcache_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) static const struct attribute *files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) &ksysfs_register.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) &ksysfs_register_quiet.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) &ksysfs_pendings_cleanup.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) check_module_parameters();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) mutex_init(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) init_waitqueue_head(&unregister_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) register_reboot_notifier(&reboot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) bcache_major = register_blkdev(0, "bcache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) if (bcache_major < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) unregister_reboot_notifier(&reboot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) mutex_destroy(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) return bcache_major;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) if (bch_btree_init())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) if (!bcache_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) * Let's not make this `WQ_MEM_RECLAIM` for the following reasons:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) * 1. It used `system_wq` before which also does no memory reclaim.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) * reduced throughput can be observed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) * We still want to user our own queue to not congest the `system_wq`.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) if (!bch_flush_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) if (!bch_journal_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) bcache_kobj = kobject_create_and_add("bcache", fs_kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) if (!bcache_kobj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) if (bch_request_init() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) sysfs_create_files(bcache_kobj, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) bch_debug_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) closure_debug_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) bcache_is_reboot = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) bcache_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) * Module hooks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) module_exit(bcache_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) module_init(bcache_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) module_param(bch_cutoff_writeback, uint, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) MODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) module_param(bch_cutoff_writeback_sync, uint, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) MODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) MODULE_DESCRIPTION("Bcache: a Linux block layer cache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) MODULE_LICENSE("GPL");