^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Some low level IO code, and hacks for various block layer limitations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright 2012 Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "bcache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "bset.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "debug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) /* Bios with headers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) void bch_bbio_free(struct bio *bio, struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) struct bbio *b = container_of(bio, struct bbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) mempool_free(b, &c->bio_meta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) struct bio *bch_bbio_alloc(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) struct bio *bio = &b->bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) return bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) struct bbio *b = container_of(bio, struct bbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) b->submit_time_us = local_clock_us();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) closure_bio_submit(c, bio, bio->bi_private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) void bch_submit_bbio(struct bio *bio, struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) struct bkey *k, unsigned int ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct bbio *b = container_of(bio, struct bbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) bch_bkey_copy_single_ptr(&b->key, k, ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) __bch_submit_bbio(bio, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /* IO errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) unsigned int errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Read-ahead requests on a degrading and recovering md raid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * (e.g. raid6) device might be failured immediately by md
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * raid code, which is not a real hardware media failure. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) if (bio->bi_opf & REQ_RAHEAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) errors = atomic_add_return(1, &dc->io_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (errors < dc->error_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) pr_err("%s: IO error on backing device, unrecoverable\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) dc->backing_dev_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) bch_cached_dev_error(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) void bch_count_io_errors(struct cache *ca,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) blk_status_t error,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) int is_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) const char *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * The halflife of an error is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if (ca->set->error_decay) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) unsigned int count = atomic_inc_return(&ca->io_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) while (count > ca->set->error_decay) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) unsigned int errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) unsigned int old = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) unsigned int new = count - ca->set->error_decay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * First we subtract refresh from count; each time we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * successfully do so, we rescale the errors once:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) count = atomic_cmpxchg(&ca->io_count, old, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) if (count == old) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) count = new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) errors = atomic_read(&ca->io_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) old = errors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) new = ((uint64_t) errors * 127) / 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) errors = atomic_cmpxchg(&ca->io_errors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) old, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) } while (old != errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) &ca->io_errors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) errors >>= IO_ERROR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) if (errors < ca->set->error_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) pr_err("%s: IO error on %s%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) ca->cache_dev_name, m,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) is_read ? ", recovering." : ".");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) bch_cache_set_error(ca->set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) "%s: too many IO errors %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) ca->cache_dev_name, m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) blk_status_t error, const char *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct bbio *b = container_of(bio, struct bbio, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) struct cache *ca = PTR_CACHE(c, &b->key, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) unsigned int threshold = op_is_write(bio_op(bio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) ? c->congested_write_threshold_us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) : c->congested_read_threshold_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) if (threshold) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) unsigned int t = local_clock_us();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) int us = t - b->submit_time_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) int congested = atomic_read(&c->congested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (us > (int) threshold) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) int ms = us / 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) c->congested_last_us = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) ms = min(ms, CONGESTED_MAX + congested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) atomic_sub(ms, &c->congested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) } else if (congested < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) atomic_inc(&c->congested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) bch_count_io_errors(ca, error, is_read, m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) void bch_bbio_endio(struct cache_set *c, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) blk_status_t error, const char *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) struct closure *cl = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) bch_bbio_count_io_errors(c, bio, error, m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) bio_put(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) closure_put(cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }