^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Main bcache entry point - handle a read or a write request and decide what to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * do with it; the make_request functions are called by the block layer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright 2012 Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "bcache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "btree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "debug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "request.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "writeback.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <trace/events/bcache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define CUTOFF_CACHE_ADD 95
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define CUTOFF_CACHE_READA 90
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) struct kmem_cache *bch_search_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) static void bch_data_insert_start(struct closure *cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) static unsigned int cache_mode(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) return BDEV_CACHE_MODE(&dc->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) static bool verify(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) return dc->verify;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static void bio_csum(struct bio *bio, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct bio_vec bv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) struct bvec_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) uint64_t csum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) bio_for_each_segment(bv, bio, iter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) void *d = kmap(bv.bv_page) + bv.bv_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) csum = bch_crc64_update(csum, d, bv.bv_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) kunmap(bv.bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* Insert data into cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) static void bch_data_insert_keys(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) atomic_t *journal_ref = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) if (!op->replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) journal_ref = bch_journal(op->c, &op->insert_keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) op->flush_journal ? cl : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) ret = bch_btree_insert(op->c, &op->insert_keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) journal_ref, replace_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) if (ret == -ESRCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) op->replace_collision = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) } else if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) op->status = BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) op->insert_data_done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) if (journal_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) atomic_dec_bug(journal_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) if (!op->insert_data_done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) continue_at(cl, bch_data_insert_start, op->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) bch_keylist_free(&op->insert_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) closure_return(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) static int bch_keylist_realloc(struct keylist *l, unsigned int u64s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) size_t oldsize = bch_keylist_nkeys(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) size_t newsize = oldsize + u64s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * The journalling code doesn't handle the case where the keys to insert
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * is bigger than an empty write: If we just return -ENOMEM here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * bch_data_insert_keys() will insert the keys created so far
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * and finish the rest when the keylist is empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) if (newsize * sizeof(uint64_t) > block_bytes(c->cache) - sizeof(struct jset))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) return __bch_keylist_realloc(l, u64s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) static void bch_data_invalidate(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct bio *bio = op->bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) pr_debug("invalidating %i sectors from %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) while (bio_sectors(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) unsigned int sectors = min(bio_sectors(bio),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 1U << (KEY_SIZE_BITS - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) bio->bi_iter.bi_sector += sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) bio->bi_iter.bi_size -= sectors << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) bch_keylist_add(&op->insert_keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) &KEY(op->inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) bio->bi_iter.bi_sector,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) sectors));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) op->insert_data_done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) /* get in bch_data_insert() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) continue_at(cl, bch_data_insert_keys, op->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) static void bch_data_insert_error(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * Our data write just errored, which means we've got a bunch of keys to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * insert that point to data that wasn't successfully written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * We don't have to insert those keys but we still have to invalidate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * that region of the cache - so, if we just strip off all the pointers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * from the keys we'll accomplish just that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) struct bkey *src = op->insert_keys.keys, *dst = op->insert_keys.keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) while (src != op->insert_keys.top) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) struct bkey *n = bkey_next(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) SET_KEY_PTRS(src, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) memmove(dst, src, bkey_bytes(src));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) dst = bkey_next(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) src = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) op->insert_keys.top = dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) bch_data_insert_keys(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static void bch_data_insert_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) struct closure *cl = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /* TODO: We could try to recover from this. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) if (op->writeback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) op->status = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) else if (!op->replace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) set_closure_fn(cl, bch_data_insert_error, op->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) set_closure_fn(cl, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) static void bch_data_insert_start(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct bio *bio = op->bio, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (op->bypass)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return bch_data_invalidate(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) wake_up_gc(op->c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * Journal writes are marked REQ_PREFLUSH; if the original write was a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * flush, it'll wait on the journal write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct bkey *k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) struct bio_set *split = &op->c->bio_split;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) /* 1 for the device pointer and 1 for the chksum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (bch_keylist_realloc(&op->insert_keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 3 + (op->csum ? 1 : 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) op->c)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) continue_at(cl, bch_data_insert_keys, op->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) k = op->insert_keys.top;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) bkey_init(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) SET_KEY_INODE(k, op->inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) SET_KEY_OFFSET(k, bio->bi_iter.bi_sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if (!bch_alloc_sectors(op->c, k, bio_sectors(bio),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) op->write_point, op->write_prio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) op->writeback))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) n->bi_end_io = bch_data_insert_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) n->bi_private = cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) if (op->writeback) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) SET_KEY_DIRTY(k, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) for (i = 0; i < KEY_PTRS(k); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) SET_GC_MARK(PTR_BUCKET(op->c, k, i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) GC_MARK_DIRTY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) SET_KEY_CSUM(k, op->csum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) if (KEY_CSUM(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) bio_csum(n, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) trace_bcache_cache_insert(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) bch_keylist_push(&op->insert_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) bio_set_op_attrs(n, REQ_OP_WRITE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) bch_submit_bbio(n, op->c, k, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) } while (n != bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) op->insert_data_done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) continue_at(cl, bch_data_insert_keys, op->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /* bch_alloc_sectors() blocks if s->writeback = true */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) BUG_ON(op->writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * But if it's not a writeback write we'd rather just bail out if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * there aren't any buckets ready to write to - it might take awhile and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * we might be starving btree writes for gc or something.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) if (!op->replace) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * Writethrough write: We can't complete the write until we've
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * updated the index. But we don't want to delay the write while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * we wait for buckets to be freed up, so just invalidate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * rest of the write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) op->bypass = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) return bch_data_invalidate(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * From a cache miss, we can just insert the keys for the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * we have written or bail out if we didn't do anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) op->insert_data_done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) if (!bch_keylist_empty(&op->insert_keys))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) continue_at(cl, bch_data_insert_keys, op->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) closure_return(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * bch_data_insert - stick some data in the cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * @cl: closure pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * This is the starting point for any data to end up in a cache device; it could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * be from a normal write, or a writeback write, or a write to a flash only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * volume - it's also used by the moving garbage collector to compact data in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * mostly empty buckets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * It first writes the data to the cache, creating a list of keys to be inserted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * (if the data had to be fragmented there will be multiple keys); after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * data is written it calls bch_journal, and after the keys have been added to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * the next journal write they're inserted into the btree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * It inserts the data in op->bio; bi_sector is used for the key offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * and op->inode is used for the key inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * If op->bypass is true, instead of inserting the data it invalidates the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * region of the cache represented by op->bio and op->inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) void bch_data_insert(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) trace_bcache_write(op->c, op->inode, op->bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) op->writeback, op->bypass);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) bch_keylist_init(&op->insert_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) bio_get(op->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) bch_data_insert_start(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * Congested? Return 0 (not congested) or the limit (in sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * beyond which we should bypass the cache due to congestion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) unsigned int bch_get_congested(const struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (!c->congested_read_threshold_us &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) !c->congested_write_threshold_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) i = (local_clock_us() - c->congested_last_us) / 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (i < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) i += atomic_read(&c->congested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (i >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) i += CONGESTED_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (i > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) i = fract_exp_two(i, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) i -= hweight32(get_random_u32());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) return i > 0 ? i : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) static void add_sequential(struct task_struct *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) ewma_add(t->sequential_io_avg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) t->sequential_io, 8, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) t->sequential_io = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) return &dc->io_hash[hash_64(k, RECENT_IO_BITS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) struct cache_set *c = dc->disk.c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) unsigned int mode = cache_mode(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) unsigned int sectors, congested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) struct task_struct *task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) struct io *i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) (bio_op(bio) == REQ_OP_DISCARD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) if (mode == CACHE_MODE_NONE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) (mode == CACHE_MODE_WRITEAROUND &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) op_is_write(bio_op(bio))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * If the bio is for read-ahead or background IO, bypass it or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * not depends on the following situations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * - If the IO is for meta data, always cache it and no bypass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * - If the IO is not meta data, check dc->cache_reada_policy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * BCH_CACHE_READA_ALL: cache it and not bypass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * BCH_CACHE_READA_META_ONLY: not cache it and bypass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) * That is, read-ahead request for metadata always get cached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * (eg, for gfs2 or xfs).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) if (bio->bi_iter.bi_sector & (c->cache->sb.block_size - 1) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) bio_sectors(bio) & (c->cache->sb.block_size - 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) pr_debug("skipping unaligned io\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (bypass_torture_test(dc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if ((get_random_int() & 3) == 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) goto rescale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) congested = bch_get_congested(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (!congested && !dc->sequential_cutoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) goto rescale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) spin_lock(&dc->io_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) if (i->last == bio->bi_iter.bi_sector &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) time_before(jiffies, i->jiffies))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) i = list_first_entry(&dc->io_lru, struct io, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) add_sequential(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) i->sequential = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) if (i->sequential + bio->bi_iter.bi_size > i->sequential)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) i->sequential += bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) i->last = bio_end_sector(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) i->jiffies = jiffies + msecs_to_jiffies(5000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) task->sequential_io = i->sequential;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) hlist_del(&i->hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) hlist_add_head(&i->hash, iohash(dc, i->last));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) list_move_tail(&i->lru, &dc->io_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) spin_unlock(&dc->io_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) sectors = max(task->sequential_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) task->sequential_io_avg) >> 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (dc->sequential_cutoff &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) sectors >= dc->sequential_cutoff >> 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) trace_bcache_bypass_sequential(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (congested && sectors >= congested) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) trace_bcache_bypass_congested(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) rescale:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) bch_rescale_priorities(c, bio_sectors(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) bch_mark_sectors_bypassed(c, dc, bio_sectors(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) /* Cache lookup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) struct search {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) /* Stack frame for bio_complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) struct bbio bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) struct bio *orig_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) struct bio *cache_miss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) struct bcache_device *d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) unsigned int insert_bio_sectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) unsigned int recoverable:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) unsigned int write:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) unsigned int read_dirty_data:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) unsigned int cache_missed:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) struct hd_struct *part;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) unsigned long start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) struct btree_op op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) struct data_insert_op iop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) static void bch_cache_read_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) struct bbio *b = container_of(bio, struct bbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) struct closure *cl = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * If the bucket was reused while our bio was in flight, we might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * read the wrong data. Set s->error but not error so it doesn't get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * counted against the cache device, but we'll still reread the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * from the backing device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (bio->bi_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) s->iop.status = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) else if (!KEY_DIRTY(&b->key) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) ptr_stale(s->iop.c, &b->key, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) atomic_long_inc(&s->iop.c->cache_read_races);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) s->iop.status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * Read from a single key, handling the initial cache miss if the key starts in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * the middle of the bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) struct search *s = container_of(op, struct search, op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) struct bio *n, *bio = &s->bio.bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) struct bkey *bio_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) unsigned int ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) return MAP_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if (KEY_INODE(k) != s->iop.inode ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) KEY_START(k) > bio->bi_iter.bi_sector) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) unsigned int bio_sectors = bio_sectors(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) unsigned int sectors = KEY_INODE(k) == s->iop.inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) ? min_t(uint64_t, INT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) KEY_START(k) - bio->bi_iter.bi_sector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) : INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) int ret = s->d->cache_miss(b, s, bio, sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) if (ret != MAP_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) /* if this was a complete miss we shouldn't get here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) BUG_ON(bio_sectors <= sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) if (!KEY_SIZE(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) return MAP_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) /* XXX: figure out best pointer - for multiple cache devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) ptr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) PTR_BUCKET(b->c, k, ptr)->prio = INITIAL_PRIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) if (KEY_DIRTY(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) s->read_dirty_data = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) KEY_OFFSET(k) - bio->bi_iter.bi_sector),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) GFP_NOIO, &s->d->bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) bio_key = &container_of(n, struct bbio, bio)->key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) bch_bkey_copy_single_ptr(bio_key, k, ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) bch_cut_front(&KEY(s->iop.inode, n->bi_iter.bi_sector, 0), bio_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) n->bi_end_io = bch_cache_read_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) n->bi_private = &s->cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * The bucket we're reading from might be reused while our bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * is in flight, and we could then end up reading the wrong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * We guard against this by checking (in cache_read_endio()) if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * the pointer is stale again; if so, we treat it as an error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * and reread from the backing device (but we don't pass that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * error up anywhere).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) __bch_submit_bbio(n, b->c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) return n == bio ? MAP_DONE : MAP_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) static void cache_lookup(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) struct search *s = container_of(cl, struct search, iop.cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) struct bio *bio = &s->bio.bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) struct cached_dev *dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) bch_btree_op_init(&s->op, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) ret = bch_btree_map_keys(&s->op, s->iop.c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) cache_lookup_fn, MAP_END_KEY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) if (ret == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) continue_at(cl, cache_lookup, bcache_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) * We might meet err when searching the btree, If that happens, we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) * get negative ret, in this scenario we should not recover data from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * backing device (when cache device is dirty) because we don't know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * whether bkeys the read request covered are all clean.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * And after that happened, s->iop.status is still its initial value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * before we submit s->bio.bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) BUG_ON(ret == -EINTR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) if (s->d && s->d->c &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) !UUID_FLASH_ONLY(&s->d->c->uuids[s->d->id])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) dc = container_of(s->d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (dc && atomic_read(&dc->has_dirty))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) s->recoverable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) if (!s->iop.status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) s->iop.status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) closure_return(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) /* Common code for the make_request functions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) static void request_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) struct closure *cl = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) s->iop.status = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) /* Only cache read errors are recoverable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) s->recoverable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) closure_put(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) static void backing_request_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) struct closure *cl = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) struct cached_dev *dc = container_of(s->d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * If a bio has REQ_PREFLUSH for writeback mode, it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) * speically assembled in cached_dev_write() for a non-zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * write request which has REQ_PREFLUSH. we don't set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * s->iop.status by this failure, the status will be decided
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) * by result of bch_data_insert() operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) if (unlikely(s->iop.writeback &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) bio->bi_opf & REQ_PREFLUSH)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) pr_err("Can't flush %s: returned bi_status %i\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) dc->backing_dev_name, bio->bi_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) /* set to orig_bio->bi_status in bio_complete() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) s->iop.status = bio->bi_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) s->recoverable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) /* should count I/O error for backing device here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) bch_count_backing_io_errors(dc, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) closure_put(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) static void bio_complete(struct search *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) if (s->orig_bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) /* Count on bcache device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) part_end_io_acct(s->part, s->orig_bio, s->start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) trace_bcache_request_end(s->d, s->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) s->orig_bio->bi_status = s->iop.status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) bio_endio(s->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) s->orig_bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) static void do_bio_hook(struct search *s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) struct bio *orig_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) bio_end_io_t *end_io_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) struct bio *bio = &s->bio.bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) bio_init(bio, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) __bio_clone_fast(bio, orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) * bi_end_io can be set separately somewhere else, e.g. the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) * variants in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * - cache_bio->bi_end_io from cached_dev_cache_miss()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * - n->bi_end_io from cache_lookup_fn()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) bio->bi_end_io = end_io_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) bio->bi_private = &s->cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) bio_cnt_set(bio, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) static void search_free(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) atomic_dec(&s->iop.c->search_inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (s->iop.bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) bio_put(s->iop.bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) bio_complete(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) closure_debug_destroy(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) mempool_free(s, &s->iop.c->search);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) static inline struct search *search_alloc(struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) struct bcache_device *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) struct search *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) s = mempool_alloc(&d->c->search, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) closure_init(&s->cl, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) do_bio_hook(s, bio, request_endio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) atomic_inc(&d->c->search_inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) s->orig_bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) s->cache_miss = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) s->cache_missed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) s->d = d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) s->recoverable = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) s->write = op_is_write(bio_op(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) s->read_dirty_data = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) /* Count on the bcache device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) s->start_time = part_start_io_acct(d->disk, &s->part, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) s->iop.c = d->c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) s->iop.bio = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) s->iop.inode = d->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) s->iop.write_point = hash_long((unsigned long) current, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) s->iop.write_prio = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) s->iop.status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) s->iop.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) s->iop.flush_journal = op_is_flush(bio->bi_opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) s->iop.wq = bcache_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) /* Cached devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) static void cached_dev_bio_complete(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) cached_dev_put(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) search_free(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) /* Process reads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) static void cached_dev_read_error_done(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) if (s->iop.replace_collision)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) bch_mark_cache_miss_collision(s->iop.c, s->d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) if (s->iop.bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) bio_free_pages(s->iop.bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) cached_dev_bio_complete(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) static void cached_dev_read_error(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) struct bio *bio = &s->bio.bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * If read request hit dirty data (s->read_dirty_data is true),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * then recovery a failed read request from cached device may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * get a stale data back. So read failure recovery is only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * permitted when read request hit clean data in cache device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * or when cache read race happened.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) if (s->recoverable && !s->read_dirty_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) /* Retry from the backing device: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) trace_bcache_read_retry(s->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) s->iop.status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) do_bio_hook(s, s->orig_bio, backing_request_endio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) /* XXX: invalidate cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) closure_bio_submit(s->iop.c, bio, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) continue_at(cl, cached_dev_read_error_done, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) static void cached_dev_cache_miss_done(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) struct bcache_device *d = s->d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (s->iop.replace_collision)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) bch_mark_cache_miss_collision(s->iop.c, s->d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (s->iop.bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) bio_free_pages(s->iop.bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) cached_dev_bio_complete(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) closure_put(&d->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) static void cached_dev_read_done(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) * We had a cache miss; cache_bio now contains data ready to be inserted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) * into the cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * First, we copy the data we just read from cache_bio's bounce buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) * to the buffers the original bio pointed to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) if (s->iop.bio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) bio_reset(s->iop.bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) s->iop.bio->bi_iter.bi_sector =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) s->cache_miss->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) bio_copy_dev(s->iop.bio, s->cache_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) bch_bio_map(s->iop.bio, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) bio_copy_data(s->cache_miss, s->iop.bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) bio_put(s->cache_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) s->cache_miss = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (verify(dc) && s->recoverable && !s->read_dirty_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) bch_data_verify(dc, s->orig_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) closure_get(&dc->disk.cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) bio_complete(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) if (s->iop.bio &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) !test_bit(CACHE_SET_STOPPING, &s->iop.c->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) BUG_ON(!s->iop.replace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) continue_at(cl, cached_dev_cache_miss_done, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) static void cached_dev_read_done_bh(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) bch_mark_cache_accounting(s->iop.c, s->d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) !s->cache_missed, s->iop.bypass);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) trace_bcache_read(s->orig_bio, !s->cache_missed, s->iop.bypass);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (s->iop.status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) else if (s->iop.bio || verify(dc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) continue_at_nobarrier(cl, cached_dev_bio_complete, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) static int cached_dev_cache_miss(struct btree *b, struct search *s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) struct bio *bio, unsigned int sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) int ret = MAP_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) unsigned int reada = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) struct bio *miss, *cache_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) s->cache_missed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) if (s->cache_miss || s->iop.bypass) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) goto out_submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) if (!(bio->bi_opf & REQ_RAHEAD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) !(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) reada = min_t(sector_t, dc->readahead >> 9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) get_capacity(bio->bi_disk) - bio_end_sector(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) s->iop.replace_key = KEY(s->iop.inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) bio->bi_iter.bi_sector + s->insert_bio_sectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) s->insert_bio_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) s->iop.replace = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) /* btree_search_recurse()'s btree iterator is no good anymore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) ret = miss == bio ? MAP_DONE : -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) cache_bio = bio_alloc_bioset(GFP_NOWAIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) &dc->disk.bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (!cache_bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) goto out_submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) bio_copy_dev(cache_bio, miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) cache_bio->bi_end_io = backing_request_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) cache_bio->bi_private = &s->cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) bch_bio_map(cache_bio, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) if (bch_bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) goto out_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (reada)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) bch_mark_cache_readahead(s->iop.c, s->d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) s->cache_miss = miss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) s->iop.bio = cache_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) bio_get(cache_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) closure_bio_submit(s->iop.c, cache_bio, &s->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) out_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) bio_put(cache_bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) out_submit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) miss->bi_end_io = backing_request_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) miss->bi_private = &s->cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) closure_bio_submit(s->iop.c, miss, &s->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) static void cached_dev_read(struct cached_dev *dc, struct search *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) struct closure *cl = &s->cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) closure_call(&s->iop.cl, cache_lookup, NULL, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) continue_at(cl, cached_dev_read_done_bh, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) /* Process writes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) static void cached_dev_write_complete(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) up_read_non_owner(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) cached_dev_bio_complete(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) static void cached_dev_write(struct cached_dev *dc, struct search *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) struct closure *cl = &s->cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) struct bio *bio = &s->bio.bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) struct bkey start = KEY(dc->disk.id, bio->bi_iter.bi_sector, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) down_read_non_owner(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * We overlap with some dirty data undergoing background
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * writeback, force this write to writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) s->iop.bypass = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) s->iop.writeback = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * Discards aren't _required_ to do anything, so skipping if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) * check_overlapping returned true is ok
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * But check_overlapping drops dirty keys for which io hasn't started,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) * so we still want to call it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) if (bio_op(bio) == REQ_OP_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) s->iop.bypass = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) if (should_writeback(dc, s->orig_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) cache_mode(dc),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) s->iop.bypass)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) s->iop.bypass = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) s->iop.writeback = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) if (s->iop.bypass) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) s->iop.bio = s->orig_bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) bio_get(s->iop.bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) if (bio_op(bio) == REQ_OP_DISCARD &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) !blk_queue_discard(bdev_get_queue(dc->bdev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) goto insert_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) bio->bi_end_io = backing_request_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) closure_bio_submit(s->iop.c, bio, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) } else if (s->iop.writeback) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) bch_writeback_add(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) s->iop.bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) if (bio->bi_opf & REQ_PREFLUSH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * Also need to send a flush to the backing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) struct bio *flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) flush = bio_alloc_bioset(GFP_NOIO, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) &dc->disk.bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) if (!flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) s->iop.status = BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) goto insert_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) bio_copy_dev(flush, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) flush->bi_end_io = backing_request_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) flush->bi_private = cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) closure_bio_submit(s->iop.c, flush, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) bio->bi_end_io = backing_request_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) closure_bio_submit(s->iop.c, bio, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) insert_data:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) continue_at(cl, cached_dev_write_complete, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) static void cached_dev_nodata(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) struct bio *bio = &s->bio.bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) if (s->iop.flush_journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) bch_journal_meta(s->iop.c, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) /* If it's a flush, we send the flush to the backing device too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) bio->bi_end_io = backing_request_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) closure_bio_submit(s->iop.c, bio, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) continue_at(cl, cached_dev_bio_complete, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) struct detached_dev_io_private {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) struct bcache_device *d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) unsigned long start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) bio_end_io_t *bi_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) void *bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) struct hd_struct *part;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) static void detached_dev_end_io(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) struct detached_dev_io_private *ddip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) ddip = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) bio->bi_end_io = ddip->bi_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) bio->bi_private = ddip->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) /* Count on the bcache device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) part_end_io_acct(ddip->part, bio, ddip->start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) struct cached_dev *dc = container_of(ddip->d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) /* should count I/O error for backing device here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) bch_count_backing_io_errors(dc, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) kfree(ddip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) bio->bi_end_io(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) struct detached_dev_io_private *ddip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) struct cached_dev *dc = container_of(d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * no need to call closure_get(&dc->disk.cl),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * because upper layer had already opened bcache device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * which would call closure_get(&dc->disk.cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) ddip->d = d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) /* Count on the bcache device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) ddip->bi_end_io = bio->bi_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) ddip->bi_private = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) bio->bi_end_io = detached_dev_end_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) bio->bi_private = ddip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) if ((bio_op(bio) == REQ_OP_DISCARD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) !blk_queue_discard(bdev_get_queue(dc->bdev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) bio->bi_end_io(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) submit_bio_noacct(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) static void quit_max_writeback_rate(struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) struct cached_dev *this_dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) struct bcache_device *d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) struct cached_dev *dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * mutex bch_register_lock may compete with other parallel requesters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * or attach/detach operations on other backing device. Waiting to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * the mutex lock may increase I/O request latency for seconds or more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) * To avoid such situation, if mutext_trylock() failed, only writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) * rate of current cached device is set to 1, and __update_write_back()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) * will decide writeback rate of other cached devices (remember now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) * c->idle_counter is 0 already).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) if (mutex_trylock(&bch_register_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) for (i = 0; i < c->devices_max_used; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (!c->devices[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) if (UUID_FLASH_ONLY(&c->uuids[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) d = c->devices[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) dc = container_of(d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * set writeback rate to default minimum value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * then let update_writeback_rate() to decide the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * upcoming rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) atomic_long_set(&dc->writeback_rate.rate, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) mutex_unlock(&bch_register_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) atomic_long_set(&this_dc->writeback_rate.rate, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) /* Cached devices - read & write stuff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) blk_qc_t cached_dev_submit_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) struct search *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) struct bcache_device *d = bio->bi_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) struct cached_dev *dc = container_of(d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) int rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) dc->io_disable)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) if (likely(d->c)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) if (atomic_read(&d->c->idle_counter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) atomic_set(&d->c->idle_counter, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * If at_max_writeback_rate of cache set is true and new I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * comes, quit max writeback rate of all cached devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * attached to this cache set, and set at_max_writeback_rate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) * to false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) atomic_set(&d->c->at_max_writeback_rate, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) quit_max_writeback_rate(d->c, dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) bio_set_dev(bio, dc->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) bio->bi_iter.bi_sector += dc->sb.data_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) if (cached_dev_get(dc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) s = search_alloc(bio, d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) trace_bcache_request_start(s->d, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) if (!bio->bi_iter.bi_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * can't call bch_journal_meta from under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) * submit_bio_noacct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) continue_at_nobarrier(&s->cl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) cached_dev_nodata,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) bcache_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) s->iop.bypass = check_should_bypass(dc, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) if (rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) cached_dev_write(dc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) cached_dev_read(dc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) detached_dev_do_request(d, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) struct cached_dev *dc = container_of(d, struct cached_dev, disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) if (dc->io_disable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) void bch_cached_dev_request_init(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) dc->disk.cache_miss = cached_dev_cache_miss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) dc->disk.ioctl = cached_dev_ioctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) /* Flash backed devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static int flash_dev_cache_miss(struct btree *b, struct search *s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) struct bio *bio, unsigned int sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) unsigned int bytes = min(sectors, bio_sectors(bio)) << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) swap(bio->bi_iter.bi_size, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) zero_fill_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) swap(bio->bi_iter.bi_size, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) bio_advance(bio, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) if (!bio->bi_iter.bi_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) return MAP_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) return MAP_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) static void flash_dev_nodata(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) struct search *s = container_of(cl, struct search, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) if (s->iop.flush_journal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) bch_journal_meta(s->iop.c, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) continue_at(cl, search_free, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) blk_qc_t flash_dev_submit_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) struct search *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) struct closure *cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) struct bcache_device *d = bio->bi_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) s = search_alloc(bio, d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) cl = &s->cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) bio = &s->bio.bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) trace_bcache_request_start(s->d, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) if (!bio->bi_iter.bi_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) * can't call bch_journal_meta from under submit_bio_noacct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) continue_at_nobarrier(&s->cl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) flash_dev_nodata,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) bcache_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) } else if (bio_data_dir(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) &KEY(d->id, bio->bi_iter.bi_sector, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) &KEY(d->id, bio_end_sector(bio), 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) s->iop.bypass = (bio_op(bio) == REQ_OP_DISCARD) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) s->iop.writeback = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) s->iop.bio = bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) closure_call(&s->iop.cl, cache_lookup, NULL, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) continue_at(cl, search_free, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) unsigned int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) return -ENOTTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) void bch_flash_dev_request_init(struct bcache_device *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) d->cache_miss = flash_dev_cache_miss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) d->ioctl = flash_dev_ioctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) void bch_request_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) kmem_cache_destroy(bch_search_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) int __init bch_request_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) bch_search_cache = KMEM_CACHE(search, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) if (!bch_search_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) }