^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * buffered writeback throttling. loosely based on CoDel. We can't drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * packets for IO scheduling, so the logic is something like this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * - Monitor latencies in a defined window of time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * - If the minimum latency in the above window exceeds some target, increment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * scaling step and scale down queue depth by a factor of 2x. The monitoring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * window is then shrunk to 100 / sqrt(scaling step + 1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * - For any window where we don't have solid data on what the latencies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * look like, retain status quo.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * - If latencies look good, decrement scaling step.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * - If we're only doing writes, allow the scaling step to go negative. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * will temporarily boost write performance, snapping back to a stable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * scaling step of 0 if reads show up or the heavy writers finish. Unlike
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * positive scaling steps where we shrink the monitoring window, a negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * scaling step retains the default step==0 window size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Copyright (C) 2016 Jens Axboe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/blk_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "blk-wbt.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "blk-rq-qos.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <trace/events/wbt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static inline void wbt_clear_state(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) rq->wbt_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) static inline enum wbt_flags wbt_flags(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) return rq->wbt_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) static inline bool wbt_is_tracked(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) return rq->wbt_flags & WBT_TRACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static inline bool wbt_is_read(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return rq->wbt_flags & WBT_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * from here depending on device stats
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) RWB_DEF_DEPTH = 16,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * 100msec window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * Disregard stats, if we don't meet this minimum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) RWB_MIN_WRITE_SAMPLES = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * If we have this number of consecutive windows with not enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * information to scale up or down, scale up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) RWB_UNKNOWN_BUMP = 5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) static inline bool rwb_enabled(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) rwb->wb_normal != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) if (rwb_enabled(rwb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) const unsigned long cur = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) if (cur != *var)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) *var = cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * If a task was rate throttled in balance_dirty_pages() within the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * second or so, use that to indicate a higher cleaning rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) static bool wb_recent_wait(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return time_before(jiffies, wb->dirty_sleep + HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) enum wbt_flags wb_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) if (wb_acct & WBT_KSWAPD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) return &rwb->rq_wait[WBT_RWQ_KSWAPD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) else if (wb_acct & WBT_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return &rwb->rq_wait[WBT_RWQ_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) return &rwb->rq_wait[WBT_RWQ_BG];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) static void rwb_wake_all(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) for (i = 0; i < WBT_NUM_RWQ; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) struct rq_wait *rqw = &rwb->rq_wait[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) if (wq_has_sleeper(&rqw->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) wake_up_all(&rqw->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) enum wbt_flags wb_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) int inflight, limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) inflight = atomic_dec_return(&rqw->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * wbt got disabled with IO in flight. Wake up any potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * waiters, we don't have to do more than that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (unlikely(!rwb_enabled(rwb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) rwb_wake_all(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * For discards, our limit is always the background. For writes, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * the device does write back caching, drop further down before we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * wake people up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (wb_acct & WBT_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) limit = rwb->wb_background;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) else if (rwb->wc && !wb_recent_wait(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) limit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) limit = rwb->wb_normal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * Don't wake anyone up if we are above the normal limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (inflight && inflight >= limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) if (wq_has_sleeper(&rqw->wait)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) int diff = limit - inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) if (!inflight || diff >= rwb->wb_background / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) wake_up_all(&rqw->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct rq_wait *rqw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) if (!(wb_acct & WBT_TRACKED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) rqw = get_rq_wait(rwb, wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) wbt_rqw_done(rwb, rqw, wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * Called on completion of a request. Note that it's also called when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * a request is merged, when the request gets freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) static void wbt_done(struct rq_qos *rqos, struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if (!wbt_is_tracked(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (rwb->sync_cookie == rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) rwb->sync_issue = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) rwb->sync_cookie = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (wbt_is_read(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) wb_timestamp(rwb, &rwb->last_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) WARN_ON_ONCE(rq == rwb->sync_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) __wbt_done(rqos, wbt_flags(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) wbt_clear_state(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) static inline bool stat_sample_valid(struct blk_rq_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * We need at least one read sample, and a minimum of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * RWB_MIN_WRITE_SAMPLES. We require some write samples to know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * that it's writes impacting us, and not just some sole read on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * a device that is in a lower power state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) return (stat[READ].nr_samples >= 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) u64 now, issue = READ_ONCE(rwb->sync_issue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (!issue || !rwb->sync_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) now = ktime_to_ns(ktime_get());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) return now - issue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) LAT_OK = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) LAT_UNKNOWN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) LAT_UNKNOWN_WRITES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) LAT_EXCEEDED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) u64 thislat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * If our stored sync issue exceeds the window size, or it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * exceeds our min target AND we haven't logged any entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * flag the latency as exceeded. wbt works off completion latencies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * but for a flooded device, a single sync IO can take a long time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * to complete after being issued. If this time exceeds our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * monitoring window AND we didn't see any other completions in that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * window, then count that sync IO as a violation of the latency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) thislat = rwb_sync_issue_lat(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) if (thislat > rwb->cur_win_nsec ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) trace_wbt_lat(bdi, thislat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) return LAT_EXCEEDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * No read/write mix, if stat isn't valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (!stat_sample_valid(stat)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * If we had writes in this stat window and the window is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * current, we're only doing writes. If a task recently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * waited or still has writes in flights, consider us doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * just writes as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (stat[WRITE].nr_samples || wb_recent_wait(rwb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) wbt_inflight(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return LAT_UNKNOWN_WRITES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return LAT_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * If the 'min' latency exceeds our target, step down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) if (stat[READ].min > rwb->min_lat_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) trace_wbt_lat(bdi, stat[READ].min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) trace_wbt_stat(bdi, stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) return LAT_EXCEEDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) if (rqd->scale_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) trace_wbt_stat(bdi, stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) return LAT_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) rwb->wb_background, rwb->wb_normal, rqd->max_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) static void calc_wb_limits(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (rwb->min_lat_nsec == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) rwb->wb_normal = rwb->wb_background = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) } else if (rwb->rq_depth.max_depth <= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) rwb->wb_normal = rwb->rq_depth.max_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) rwb->wb_background = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) static void scale_up(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) if (!rq_depth_scale_up(&rwb->rq_depth))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) calc_wb_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) rwb->unknown_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) rwb_wake_all(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) rwb_trace_step(rwb, tracepoint_string("scale up"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static void scale_down(struct rq_wb *rwb, bool hard_throttle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) calc_wb_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) rwb->unknown_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) rwb_trace_step(rwb, tracepoint_string("scale down"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) static void rwb_arm_timer(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (rqd->scale_step > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * We should speed this up, using some variant of a fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * integer inverse square root calculation. Since we only do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * this for every window expiration, it's not a huge deal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) int_sqrt((rqd->scale_step + 1) << 8));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * For step < 0, we don't want to increase/decrease the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * window size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) rwb->cur_win_nsec = rwb->win_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) static void wb_timer_fn(struct blk_stat_callback *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) struct rq_wb *rwb = cb->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) unsigned int inflight = wbt_inflight(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) status = latency_exceeded(rwb, cb->stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * If we exceeded the latency target, step down. If we did not,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * step one level up. If we don't know enough to say either exceeded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * or ok, then don't do anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) switch (status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) case LAT_EXCEEDED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) scale_down(rwb, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) case LAT_OK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) scale_up(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) case LAT_UNKNOWN_WRITES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * We started a the center step, but don't have a valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) * read/write sample, but we do have writes going on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * Allow step to go negative, to increase write perf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) scale_up(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) case LAT_UNKNOWN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * We get here when previously scaled reduced depth, and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) * currently don't have a valid read/write sample. For that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * case, slowly return to center state (step == 0).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if (rqd->scale_step > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) scale_up(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) else if (rqd->scale_step < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) scale_down(rwb, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * Re-arm timer, if we have IO in flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) if (rqd->scale_step || inflight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) rwb_arm_timer(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) static void wbt_update_limits(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) rqd->scale_step = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) rqd->scaled_max = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) rq_depth_calc_max_depth(rqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) calc_wb_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) rwb_wake_all(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) u64 wbt_get_min_lat(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) if (!rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) return RQWB(rqos)->min_lat_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) void wbt_set_min_lat(struct request_queue *q, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (!rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) RQWB(rqos)->min_lat_nsec = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) wbt_update_limits(RQWB(rqos));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) static bool close_io(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) const unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) return time_before(now, rwb->last_issue + HZ / 10) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) time_before(now, rwb->last_comp + HZ / 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) unsigned int limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * If we got disabled, just return UINT_MAX. This ensures that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * we'll properly inc a new IO, and dec+wakeup at the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return rwb->wb_background;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * At this point we know it's a buffered write. If this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * kswapd trying to free memory, or REQ_SYNC is set, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * it's WB_SYNC_ALL writeback, and we'll use the max limit for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * that. If the write is marked as a background write, then use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * the idle limit, or go to normal if we haven't had competing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * IO for a bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) limit = rwb->rq_depth.max_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * If less than 100ms since we completed unrelated IO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) * limit us to half the depth for background writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) limit = rwb->wb_background;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) limit = rwb->wb_normal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) return limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) struct wbt_wait_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) struct rq_wb *rwb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) enum wbt_flags wb_acct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) unsigned long rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) struct wbt_wait_data *data = private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) struct wbt_wait_data *data = private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) wbt_rqw_done(data->rwb, rqw, data->wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * Block if we will exceed our limit, or if we are currently waiting for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * the timer to kick off queuing again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) unsigned long rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) struct wbt_wait_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) .rwb = rwb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) .wb_acct = wb_acct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) .rw = rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * Don't throttle WRITE_ODIRECT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) (REQ_SYNC | REQ_IDLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) enum wbt_flags flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) if (bio_op(bio) == REQ_OP_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) flags = WBT_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) } else if (wbt_should_throttle(rwb, bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) if (current_is_kswapd())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) flags |= WBT_KSWAPD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (bio_op(bio) == REQ_OP_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) flags |= WBT_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) flags |= WBT_TRACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) return flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) enum wbt_flags flags = bio_to_wbt_flags(rwb, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) __wbt_done(rqos, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * Returns true if the IO request should be accounted, false if not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * May sleep, if we have exceeded the writeback limits. Caller can pass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * in an irq held spinlock, if it holds one when calling this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * If we do sleep, we'll release and re-grab it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) enum wbt_flags flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) flags = bio_to_wbt_flags(rwb, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) if (!(flags & WBT_TRACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) if (flags & WBT_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) wb_timestamp(rwb, &rwb->last_issue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) __wbt_wait(rwb, flags, bio->bi_opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (!blk_stat_is_active(rwb->cb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) rwb_arm_timer(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) rq->wbt_flags |= bio_to_wbt_flags(rwb, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) static void wbt_issue(struct rq_qos *rqos, struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) * Track sync issue, in case it takes a long time to complete. Allows us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * to react quicker, if a sync IO takes a long time to complete. Note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * that this is just a hint. The request can go away when it completes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * so it's important we never dereference it. We only use the address to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * compare with, which is why we store the sync_issue time locally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) if (wbt_is_read(rq) && !rwb->sync_issue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) rwb->sync_cookie = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) rwb->sync_issue = rq->io_start_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) if (rq == rwb->sync_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) rwb->sync_issue = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) rwb->sync_cookie = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) if (rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) RQWB(rqos)->wc = write_cache_on;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) * Enable wbt if defaults are configured that way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) void wbt_enable_default(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) /* Throttling already enabled? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) if (rqos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) /* Queue not registered? Maybe shutting down... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (!blk_queue_registered(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) wbt_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) EXPORT_SYMBOL_GPL(wbt_enable_default);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) u64 wbt_default_latency_nsec(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) * We default to 2msec for non-rotational storage, and 75msec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) * for rotational storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) if (blk_queue_nonrot(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) return 2000000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) return 75000000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) static int wbt_data_dir(const struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) const int op = req_op(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (op == REQ_OP_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) return READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) else if (op_is_write(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) return WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /* don't account */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) static void wbt_queue_depth_changed(struct rq_qos *rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) wbt_update_limits(RQWB(rqos));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) static void wbt_exit(struct rq_qos *rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) struct request_queue *q = rqos->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) blk_stat_remove_callback(q, rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) blk_stat_free_callback(rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) kfree(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * Disable wbt, if enabled by default.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) void wbt_disable_default(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) struct rq_wb *rwb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (!rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) blk_stat_deactivate(rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) rwb->enable_state = WBT_STATE_OFF_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) EXPORT_SYMBOL_GPL(wbt_disable_default);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) #ifdef CONFIG_BLK_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) static int wbt_curr_win_nsec_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) seq_printf(m, "%llu\n", rwb->cur_win_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) static int wbt_enabled_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) seq_printf(m, "%d\n", rwb->enable_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) static int wbt_id_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) seq_printf(m, "%u\n", rqos->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) static int wbt_inflight_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) for (i = 0; i < WBT_NUM_RWQ; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) seq_printf(m, "%d: inflight %d\n", i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) atomic_read(&rwb->rq_wait[i].inflight));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) static int wbt_min_lat_nsec_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) seq_printf(m, "%lu\n", rwb->min_lat_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) static int wbt_unknown_cnt_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) seq_printf(m, "%u\n", rwb->unknown_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) static int wbt_normal_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) seq_printf(m, "%u\n", rwb->wb_normal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) static int wbt_background_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) seq_printf(m, "%u\n", rwb->wb_background);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) {"curr_win_nsec", 0400, wbt_curr_win_nsec_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) {"enabled", 0400, wbt_enabled_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) {"id", 0400, wbt_id_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) {"inflight", 0400, wbt_inflight_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) {"min_lat_nsec", 0400, wbt_min_lat_nsec_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) {"unknown_cnt", 0400, wbt_unknown_cnt_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) {"wb_normal", 0400, wbt_normal_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) {"wb_background", 0400, wbt_background_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) static struct rq_qos_ops wbt_rqos_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) .throttle = wbt_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) .issue = wbt_issue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) .track = wbt_track,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) .requeue = wbt_requeue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) .done = wbt_done,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) .cleanup = wbt_cleanup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) .queue_depth_changed = wbt_queue_depth_changed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) .exit = wbt_exit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) #ifdef CONFIG_BLK_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) .debugfs_attrs = wbt_debugfs_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) int wbt_init(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) struct rq_wb *rwb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (!rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (!rwb->cb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) kfree(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) for (i = 0; i < WBT_NUM_RWQ; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) rq_wait_init(&rwb->rq_wait[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) rwb->rqos.id = RQ_QOS_WBT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) rwb->rqos.ops = &wbt_rqos_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) rwb->rqos.q = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) rwb->last_comp = rwb->last_issue = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) rwb->win_nsec = RWB_WINDOW_NSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) rwb->enable_state = WBT_STATE_ON_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) rwb->wc = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) wbt_update_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * Assign rwb and add the stats callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) rq_qos_add(q, &rwb->rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) blk_stat_add_callback(q, rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) rwb->min_lat_nsec = wbt_default_latency_nsec(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) wbt_queue_depth_changed(&rwb->rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) }