^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * background writeback - scan btree for dirty data and write it to the backing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright 2012 Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "bcache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "btree.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "debug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "writeback.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/sched/clock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <trace/events/bcache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) static void update_gc_after_writeback(struct cache_set *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) c->gc_after_writeback |= BCH_DO_AUTO_GC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /* Rate limiting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) static uint64_t __calc_target_rate(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) struct cache_set *c = dc->disk.c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * This is the size of the cache, minus the amount used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * flash-only devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) atomic_long_read(&c->flash_dev_dirty_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * Unfortunately there is no control of global dirty data. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * user states that they want 10% dirty data in the cache, and has,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * e.g., 5 backing volumes of equal size, we try and ensure each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * backing volume uses about 2% of the cache for dirty data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) uint32_t bdev_share =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) div64_u64(bdev_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) c->cached_dev_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) uint64_t cache_dirty_target =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) div_u64(cache_sectors * dc->writeback_percent, 100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /* Ensure each backing dev gets at least one dirty share */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) if (bdev_share < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) bdev_share = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) return (cache_dirty_target * bdev_share) >> WRITEBACK_SHARE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) static void __update_writeback_rate(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * PI controller:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * Figures out the amount that should be written per second.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * First, the error (number of sectors that are dirty beyond our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * target) is calculated. The error is accumulated (numerically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * integrated).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * Then, the proportional value and integral value are scaled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * based on configured values. These are stored as inverses to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * avoid fixed point math and to make configuration easy-- e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * the default value of 40 for writeback_rate_p_term_inverse
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * attempts to write at a rate that would retire all the dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * blocks in 40 seconds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * The writeback_rate_i_inverse value of 10000 means that 1/10000th
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * of the error is accumulated in the integral term per second.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * This acts as a slow, long-term average that is not subject to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * variations in usage like the p term.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) int64_t target = __calc_target_rate(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) int64_t error = dirty - target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) int64_t proportional_scaled =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) div_s64(error, dc->writeback_rate_p_term_inverse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) int64_t integral_scaled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) uint32_t new_rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) if ((error < 0 && dc->writeback_rate_integral > 0) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) (error > 0 && time_before64(local_clock(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) dc->writeback_rate.next + NSEC_PER_MSEC))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * Only decrease the integral term if it's more than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * zero. Only increase the integral term if the device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * is keeping up. (Don't wind up the integral
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * ineffectively in either case).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * It's necessary to scale this by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * writeback_rate_update_seconds to keep the integral
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * term dimensioned properly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) dc->writeback_rate_integral += error *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) dc->writeback_rate_update_seconds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) integral_scaled = div_s64(dc->writeback_rate_integral,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) dc->writeback_rate_i_term_inverse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) new_rate = clamp_t(int32_t, (proportional_scaled + integral_scaled),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) dc->writeback_rate_minimum, NSEC_PER_SEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) dc->writeback_rate_proportional = proportional_scaled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) dc->writeback_rate_integral_scaled = integral_scaled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) dc->writeback_rate_change = new_rate -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) atomic_long_read(&dc->writeback_rate.rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) atomic_long_set(&dc->writeback_rate.rate, new_rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) dc->writeback_rate_target = target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) static bool set_at_max_writeback_rate(struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /* Don't sst max writeback rate if it is disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (!c->idle_max_writeback_rate_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /* Don't set max writeback rate if gc is running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (!c->gc_mark_valid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * Idle_counter is increased everytime when update_writeback_rate() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * called. If all backing devices attached to the same cache set have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * identical dc->writeback_rate_update_seconds values, it is about 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * rounds of update_writeback_rate() on each backing device before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * to each dc->writeback_rate.rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * In order to avoid extra locking cost for counting exact dirty cached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * devices number, c->attached_dev_nr is used to calculate the idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * throushold. It might be bigger if not all cached device are in write-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * back mode, but it still works well with limited extra rounds of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * update_writeback_rate().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) if (atomic_inc_return(&c->idle_counter) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) atomic_read(&c->attached_dev_nr) * 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (atomic_read(&c->at_max_writeback_rate) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) atomic_set(&c->at_max_writeback_rate, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /* keep writeback_rate_target as existing value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) dc->writeback_rate_proportional = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) dc->writeback_rate_integral_scaled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) dc->writeback_rate_change = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * Check c->idle_counter and c->at_max_writeback_rate agagain in case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * new I/O arrives during before set_at_max_writeback_rate() returns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * Then the writeback rate is set to 1, and its new value should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * decided via __update_writeback_rate().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) if ((atomic_read(&c->idle_counter) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) atomic_read(&c->attached_dev_nr) * 6) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) !atomic_read(&c->at_max_writeback_rate))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static void update_writeback_rate(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct cached_dev *dc = container_of(to_delayed_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) struct cached_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) writeback_rate_update);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) struct cache_set *c = dc->disk.c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * should check BCACHE_DEV_RATE_DW_RUNNING before calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * cancel_delayed_work_sync().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * CACHE_SET_IO_DISABLE might be set via sysfs interface,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * check it here too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * If the whole cache set is idle, set_at_max_writeback_rate()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * will set writeback rate to a max number. Then it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * unncessary to update writeback rate for an idle cache set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * in maximum writeback rate number(s).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) if (!set_at_max_writeback_rate(c, dc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) down_read(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) __update_writeback_rate(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) update_gc_after_writeback(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) up_read(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * CACHE_SET_IO_DISABLE might be set via sysfs interface,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * check it here too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) schedule_delayed_work(&dc->writeback_rate_update,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) dc->writeback_rate_update_seconds * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * should check BCACHE_DEV_RATE_DW_RUNNING before calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * cancel_delayed_work_sync().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static unsigned int writeback_delay(struct cached_dev *dc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) unsigned int sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) !dc->writeback_percent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) return bch_next_delay(&dc->writeback_rate, sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) struct dirty_io {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) struct cached_dev *dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) uint16_t sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) struct bio bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) static void dirty_init(struct keybuf_key *w)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) struct dirty_io *io = w->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) struct bio *bio = &io->bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) bio_init(bio, bio->bi_inline_vecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) if (!io->dc->writeback_percent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) bio->bi_private = w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) bch_bio_map(bio, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) static void dirty_io_destructor(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) struct dirty_io *io = container_of(cl, struct dirty_io, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) kfree(io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) static void write_dirty_finish(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) struct dirty_io *io = container_of(cl, struct dirty_io, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) struct keybuf_key *w = io->bio.bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) struct cached_dev *dc = io->dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) bio_free_pages(&io->bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) /* This is kind of a dumb way of signalling errors. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (KEY_DIRTY(&w->key)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct keylist keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) bch_keylist_init(&keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) bkey_copy(keys.top, &w->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) SET_KEY_DIRTY(keys.top, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) bch_keylist_push(&keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) for (i = 0; i < KEY_PTRS(&w->key); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) ret = bch_btree_insert(dc->disk.c, &keys, NULL, &w->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) trace_bcache_writeback_collision(&w->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) atomic_long_inc(ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) ? &dc->disk.c->writeback_keys_failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) : &dc->disk.c->writeback_keys_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) bch_keybuf_del(&dc->writeback_keys, w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) up(&dc->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) closure_return_with_destructor(cl, dirty_io_destructor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) static void dirty_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) struct keybuf_key *w = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) struct dirty_io *io = w->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) if (bio->bi_status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) SET_KEY_DIRTY(&w->key, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) bch_count_backing_io_errors(io->dc, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) closure_put(&io->cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) static void write_dirty(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) struct dirty_io *io = container_of(cl, struct dirty_io, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) struct keybuf_key *w = io->bio.bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) struct cached_dev *dc = io->dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) uint16_t next_sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) if (atomic_read(&dc->writeback_sequence_next) != io->sequence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /* Not our turn to write; wait for a write to complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) closure_wait(&dc->writeback_ordering_wait, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (atomic_read(&dc->writeback_sequence_next) == io->sequence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * Edge case-- it happened in indeterminate order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * relative to when we were added to wait list..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) closure_wake_up(&dc->writeback_ordering_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) continue_at(cl, write_dirty, io->dc->writeback_write_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) next_sequence = io->sequence + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) * IO errors are signalled using the dirty bit on the key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) * If we failed to read, we should not attempt to write to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * backing device. Instead, immediately go to write_dirty_finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * to clean up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (KEY_DIRTY(&w->key)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) dirty_init(w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) io->bio.bi_iter.bi_sector = KEY_START(&w->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) bio_set_dev(&io->bio, io->dc->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) io->bio.bi_end_io = dirty_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /* I/O request sent to backing device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) closure_bio_submit(io->dc->disk.c, &io->bio, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) atomic_set(&dc->writeback_sequence_next, next_sequence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) closure_wake_up(&dc->writeback_ordering_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) static void read_dirty_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) struct keybuf_key *w = bio->bi_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) struct dirty_io *io = w->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) /* is_read = 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) bio->bi_status, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) "reading dirty data from cache");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) dirty_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) static void read_dirty_submit(struct closure *cl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) struct dirty_io *io = container_of(cl, struct dirty_io, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) closure_bio_submit(io->dc->disk.c, &io->bio, cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) continue_at(cl, write_dirty, io->dc->writeback_write_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) static void read_dirty(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) unsigned int delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) int nk, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) struct dirty_io *io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) struct closure cl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) uint16_t sequence = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) BUG_ON(!llist_empty(&dc->writeback_ordering_wait.list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) atomic_set(&dc->writeback_sequence_next, sequence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) closure_init_stack(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * XXX: if we error, background writeback just spins. Should use some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * mempools.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) next = bch_keybuf_next(&dc->writeback_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) while (!kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) nk = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) BUG_ON(ptr_stale(dc->disk.c, &next->key, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * Don't combine too many operations, even if they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * are all small.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (nk >= MAX_WRITEBACKS_IN_PASS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * If the current operation is very large, don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * further combine operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) if (size >= MAX_WRITESIZE_IN_PASS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * Operations are only eligible to be combined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * if they are contiguous.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * TODO: add a heuristic willing to fire a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * certain amount of non-contiguous IO per pass,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * so that we can benefit from backing device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * command queueing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if ((nk != 0) && bkey_cmp(&keys[nk-1]->key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) &START_KEY(&next->key)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) size += KEY_SIZE(&next->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) keys[nk++] = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) } while ((next = bch_keybuf_next(&dc->writeback_keys)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) /* Now we have gathered a set of 1..5 keys to write back. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) for (i = 0; i < nk; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) w = keys[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) io = kzalloc(struct_size(io, bio.bi_inline_vecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) if (!io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) w->private = io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) io->dc = dc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) io->sequence = sequence++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) dirty_init(w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) bio_set_dev(&io->bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) PTR_CACHE(dc->disk.c, &w->key, 0)->bdev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) io->bio.bi_end_io = read_dirty_endio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) trace_bcache_writeback(&w->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) down(&dc->in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * We've acquired a semaphore for the maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * simultaneous number of writebacks; from here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * everything happens asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) closure_call(&io->cl, read_dirty_submit, NULL, &cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) delay = writeback_delay(dc, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) while (!kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) delay) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) schedule_timeout_interruptible(delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) delay = writeback_delay(dc, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) if (0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) err_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) kfree(w->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) bch_keybuf_del(&dc->writeback_keys, w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * Wait for outstanding writeback IOs to finish (and keybuf slots to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * freed) before refilling again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) closure_sync(&cl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) /* Scan for dirty data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) uint64_t offset, int nr_sectors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) struct bcache_device *d = c->devices[inode];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) unsigned int stripe_offset, sectors_dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) int stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) if (!d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) stripe = offset_to_stripe(d, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (stripe < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (UUID_FLASH_ONLY(&c->uuids[inode]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) stripe_offset = offset & (d->stripe_size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) while (nr_sectors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) int s = min_t(unsigned int, abs(nr_sectors),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) d->stripe_size - stripe_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (nr_sectors < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) s = -s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) if (stripe >= d->nr_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) sectors_dirty = atomic_add_return(s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) d->stripe_sectors_dirty + stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) if (sectors_dirty == d->stripe_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) set_bit(stripe, d->full_dirty_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) clear_bit(stripe, d->full_dirty_stripes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) nr_sectors -= s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) stripe_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) stripe++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) static bool dirty_pred(struct keybuf *buf, struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) struct cached_dev *dc = container_of(buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) struct cached_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) writeback_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) BUG_ON(KEY_INODE(k) != dc->disk.id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) return KEY_DIRTY(k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) static void refill_full_stripes(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) struct keybuf *buf = &dc->writeback_keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) unsigned int start_stripe, next_stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) int stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) bool wrapped = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) if (stripe < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) stripe = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) start_stripe = stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) stripe = find_next_bit(dc->disk.full_dirty_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) dc->disk.nr_stripes, stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) if (stripe == dc->disk.nr_stripes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) next_stripe = find_next_zero_bit(dc->disk.full_dirty_stripes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) dc->disk.nr_stripes, stripe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) buf->last_scanned = KEY(dc->disk.id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) stripe * dc->disk.stripe_size, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) bch_refill_keybuf(dc->disk.c, buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) &KEY(dc->disk.id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) next_stripe * dc->disk.stripe_size, 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) dirty_pred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) if (array_freelist_empty(&buf->freelist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) stripe = next_stripe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (wrapped && stripe > start_stripe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) if (stripe == dc->disk.nr_stripes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) stripe = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) wrapped = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * Returns true if we scanned the entire disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) static bool refill_dirty(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) struct keybuf *buf = &dc->writeback_keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) struct bkey start = KEY(dc->disk.id, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) struct bkey start_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * make sure keybuf pos is inside the range for this disk - at bringup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) * we might not be attached yet so this disk's inode nr isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) * initialized then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) if (bkey_cmp(&buf->last_scanned, &start) < 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) bkey_cmp(&buf->last_scanned, &end) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) buf->last_scanned = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (dc->partial_stripes_expensive) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) refill_full_stripes(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) if (array_freelist_empty(&buf->freelist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) start_pos = buf->last_scanned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) if (bkey_cmp(&buf->last_scanned, &end) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) * If we get to the end start scanning again from the beginning, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) * only scan up to where we initially started scanning from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) buf->last_scanned = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) return bkey_cmp(&buf->last_scanned, &start_pos) >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) static int bch_writeback_thread(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) struct cached_dev *dc = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) struct cache_set *c = dc->disk.c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) bool searched_full_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) bch_ratelimit_reset(&dc->writeback_rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) while (!kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) down_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) * If the bache device is detaching, skip here and continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) * to perform writeback. Otherwise, if no dirty data on cache,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) * or there is dirty data on cache but writeback is disabled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) * the writeback thread should sleep here and wait for others
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) * to wake up it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) up_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) if (kthread_should_stop() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) searched_full_index = refill_dirty(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (searched_full_index &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) atomic_set(&dc->has_dirty, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) bch_write_bdev_super(dc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) * If bcache device is detaching via sysfs interface,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * writeback thread should stop after there is no dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * data on cache. BCACHE_DEV_DETACHING flag is set in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) * bch_cached_dev_detach().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) up_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) * When dirty data rate is high (e.g. 50%+), there might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) * be heavy buckets fragmentation after writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) * finished, which hurts following write performance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) * If users really care about write performance they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) * may set BCH_ENABLE_AUTO_GC via sysfs, then when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * BCH_DO_AUTO_GC is set, garbage collection thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) * will be wake up here. After moving gc, the shrunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * btree and discarded free buckets SSD space may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * helpful for following write requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) if (c->gc_after_writeback ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) c->gc_after_writeback &= ~BCH_DO_AUTO_GC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) force_wake_up_gc(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) up_write(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) read_dirty(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) if (searched_full_index) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) unsigned int delay = dc->writeback_delay * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) while (delay &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) !kthread_should_stop() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) delay = schedule_timeout_interruptible(delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) bch_ratelimit_reset(&dc->writeback_rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (dc->writeback_write_wq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) flush_workqueue(dc->writeback_write_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) destroy_workqueue(dc->writeback_write_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) cached_dev_put(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) wait_for_kthread_stop();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) /* Init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) #define INIT_KEYS_EACH_TIME 500000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) #define INIT_KEYS_SLEEP_MS 100
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) struct sectors_dirty_init {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) struct btree_op op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) unsigned int inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) size_t count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) struct bkey start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) struct sectors_dirty_init *op = container_of(_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) struct sectors_dirty_init, op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) if (KEY_INODE(k) > op->inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) return MAP_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) if (KEY_DIRTY(k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) KEY_START(k), KEY_SIZE(k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) op->count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) if (atomic_read(&b->c->search_inflight) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) !(op->count % INIT_KEYS_EACH_TIME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) bkey_copy_key(&op->start, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) return MAP_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) static int bch_root_node_dirty_init(struct cache_set *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) struct bcache_device *d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) struct bkey *k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) struct sectors_dirty_init op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) bch_btree_op_init(&op.op, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) op.inode = d->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) op.count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) op.start = KEY(op.inode, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) ret = bcache_btree(map_keys_recurse,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) k,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) c->root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) &op.op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) &op.start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) sectors_dirty_init_fn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) if (ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) schedule_timeout_interruptible(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) else if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) pr_warn("sectors dirty init failed, ret=%d!\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) } while (ret == -EAGAIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) static int bch_dirty_init_thread(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) struct dirty_init_thrd_info *info = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) struct bch_dirty_init_state *state = info->state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) struct cache_set *c = state->c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) struct bkey *k, *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) int cur_idx, prev_idx, skip_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) k = p = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) cur_idx = prev_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) bch_btree_iter_init(&c->root->keys, &iter, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) BUG_ON(!k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) p = k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) while (k) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) spin_lock(&state->idx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) cur_idx = state->key_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) state->key_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) spin_unlock(&state->idx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) skip_nr = cur_idx - prev_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) while (skip_nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) k = bch_btree_iter_next_filter(&iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) &c->root->keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) bch_ptr_bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) p = k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) atomic_set(&state->enough, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) /* Update state->enough earlier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) skip_nr--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) if (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if (bch_root_node_dirty_init(c, state->d, p) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) p = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) prev_idx = cur_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) /* In order to wake up state->wait in time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) smp_mb__before_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) if (atomic_dec_and_test(&state->started))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) wake_up(&state->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) static int bch_btre_dirty_init_thread_nr(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) int n = num_online_cpus()/2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) if (n == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) n = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) else if (n > BCH_DIRTY_INIT_THRD_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) n = BCH_DIRTY_INIT_THRD_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) return n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) void bch_sectors_dirty_init(struct bcache_device *d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) struct bkey *k = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) struct btree_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) struct sectors_dirty_init op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) struct cache_set *c = d->c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) struct bch_dirty_init_state *state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) char name[32];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) /* Just count root keys if no leaf node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (c->root->level == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) bch_btree_op_init(&op.op, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) op.inode = d->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) op.count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) op.start = KEY(op.inode, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) for_each_key_filter(&c->root->keys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) k, &iter, bch_ptr_invalid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) sectors_dirty_init_fn(&op.op, c->root, k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (!state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) pr_warn("sectors dirty init failed: cannot allocate memory\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) state->c = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) state->d = d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) state->total_threads = bch_btre_dirty_init_thread_nr();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) state->key_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) spin_lock_init(&state->idx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) atomic_set(&state->started, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) atomic_set(&state->enough, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) init_waitqueue_head(&state->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) for (i = 0; i < state->total_threads; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) /* Fetch latest state->enough earlier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) smp_mb__before_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (atomic_read(&state->enough))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) state->infos[i].state = state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) atomic_inc(&state->started);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) state->infos[i].thread =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) kthread_run(bch_dirty_init_thread,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) &state->infos[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) if (IS_ERR(state->infos[i].thread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) pr_err("fails to run thread bch_dirty_init[%d]\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) for (--i; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) kthread_stop(state->infos[i].thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * Must wait for all threads to stop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) wait_event_interruptible(state->wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) atomic_read(&state->started) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) kfree(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) void bch_cached_dev_writeback_init(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) sema_init(&dc->in_flight, 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) init_rwsem(&dc->writeback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) bch_keybuf_init(&dc->writeback_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) dc->writeback_metadata = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) dc->writeback_running = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) dc->writeback_percent = 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) dc->writeback_delay = 30;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) atomic_long_set(&dc->writeback_rate.rate, 1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) dc->writeback_rate_minimum = 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) dc->writeback_rate_p_term_inverse = 40;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) dc->writeback_rate_i_term_inverse = 10000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) int bch_cached_dev_writeback_start(struct cached_dev *dc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) WQ_MEM_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (!dc->writeback_write_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) cached_dev_get(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) "bcache_writeback");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) if (IS_ERR(dc->writeback_thread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) cached_dev_put(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) destroy_workqueue(dc->writeback_write_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) return PTR_ERR(dc->writeback_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) dc->writeback_running = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) schedule_delayed_work(&dc->writeback_rate_update,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) dc->writeback_rate_update_seconds * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) bch_writeback_queue(dc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }