Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * buffered writeback throttling. loosely based on CoDel. We can't drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * packets for IO scheduling, so the logic is something like this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * - Monitor latencies in a defined window of time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * - If the minimum latency in the above window exceeds some target, increment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *   scaling step and scale down queue depth by a factor of 2x. The monitoring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *   window is then shrunk to 100 / sqrt(scaling step + 1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  * - For any window where we don't have solid data on what the latencies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *   look like, retain status quo.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  * - If latencies look good, decrement scaling step.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  * - If we're only doing writes, allow the scaling step to go negative. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  *   will temporarily boost write performance, snapping back to a stable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  *   scaling step of 0 if reads show up or the heavy writers finish. Unlike
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  *   positive scaling steps where we shrink the monitoring window, a negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  *   scaling step retains the default step==0 window size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  * Copyright (C) 2016 Jens Axboe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <linux/blk_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #include "blk-wbt.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #include "blk-rq-qos.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #include <trace/events/wbt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) static inline void wbt_clear_state(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	rq->wbt_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) static inline enum wbt_flags wbt_flags(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	return rq->wbt_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) static inline bool wbt_is_tracked(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	return rq->wbt_flags & WBT_TRACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) static inline bool wbt_is_read(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	return rq->wbt_flags & WBT_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	 * from here depending on device stats
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	RWB_DEF_DEPTH	= 16,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	 * 100msec window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	RWB_WINDOW_NSEC		= 100 * 1000 * 1000ULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	 * Disregard stats, if we don't meet this minimum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	RWB_MIN_WRITE_SAMPLES	= 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 	 * If we have this number of consecutive windows with not enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	 * information to scale up or down, scale up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	RWB_UNKNOWN_BUMP	= 5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) static inline bool rwb_enabled(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 		      rwb->wb_normal != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	if (rwb_enabled(rwb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 		const unsigned long cur = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 		if (cur != *var)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 			*var = cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95)  * If a task was rate throttled in balance_dirty_pages() within the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96)  * second or so, use that to indicate a higher cleaning rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) static bool wb_recent_wait(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	return time_before(jiffies, wb->dirty_sleep + HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 					  enum wbt_flags wb_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	if (wb_acct & WBT_KSWAPD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	else if (wb_acct & WBT_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 		return &rwb->rq_wait[WBT_RWQ_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	return &rwb->rq_wait[WBT_RWQ_BG];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) static void rwb_wake_all(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	for (i = 0; i < WBT_NUM_RWQ; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 		struct rq_wait *rqw = &rwb->rq_wait[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 		if (wq_has_sleeper(&rqw->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 			wake_up_all(&rqw->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 			 enum wbt_flags wb_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	int inflight, limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	inflight = atomic_dec_return(&rqw->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	 * wbt got disabled with IO in flight. Wake up any potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	 * waiters, we don't have to do more than that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	if (unlikely(!rwb_enabled(rwb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 		rwb_wake_all(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	 * For discards, our limit is always the background. For writes, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	 * the device does write back caching, drop further down before we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	 * wake people up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	if (wb_acct & WBT_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 		limit = rwb->wb_background;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	else if (rwb->wc && !wb_recent_wait(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 		limit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		limit = rwb->wb_normal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	 * Don't wake anyone up if we are above the normal limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	if (inflight && inflight >= limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	if (wq_has_sleeper(&rqw->wait)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 		int diff = limit - inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 		if (!inflight || diff >= rwb->wb_background / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 			wake_up_all(&rqw->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	struct rq_wait *rqw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	if (!(wb_acct & WBT_TRACKED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	rqw = get_rq_wait(rwb, wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	wbt_rqw_done(rwb, rqw, wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)  * Called on completion of a request. Note that it's also called when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)  * a request is merged, when the request gets freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) static void wbt_done(struct rq_qos *rqos, struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	if (!wbt_is_tracked(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 		if (rwb->sync_cookie == rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 			rwb->sync_issue = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 			rwb->sync_cookie = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 		if (wbt_is_read(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 			wb_timestamp(rwb, &rwb->last_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 		WARN_ON_ONCE(rq == rwb->sync_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 		__wbt_done(rqos, wbt_flags(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	wbt_clear_state(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) static inline bool stat_sample_valid(struct blk_rq_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	 * We need at least one read sample, and a minimum of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	 * RWB_MIN_WRITE_SAMPLES. We require some write samples to know
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	 * that it's writes impacting us, and not just some sole read on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	 * a device that is in a lower power state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	return (stat[READ].nr_samples >= 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	u64 now, issue = READ_ONCE(rwb->sync_issue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	if (!issue || !rwb->sync_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	now = ktime_to_ns(ktime_get());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	return now - issue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	LAT_OK = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	LAT_UNKNOWN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	LAT_UNKNOWN_WRITES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	LAT_EXCEEDED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	u64 thislat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	 * If our stored sync issue exceeds the window size, or it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	 * exceeds our min target AND we haven't logged any entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	 * flag the latency as exceeded. wbt works off completion latencies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	 * but for a flooded device, a single sync IO can take a long time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	 * to complete after being issued. If this time exceeds our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	 * monitoring window AND we didn't see any other completions in that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	 * window, then count that sync IO as a violation of the latency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	thislat = rwb_sync_issue_lat(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	if (thislat > rwb->cur_win_nsec ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	    (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 		trace_wbt_lat(bdi, thislat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 		return LAT_EXCEEDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	 * No read/write mix, if stat isn't valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	if (!stat_sample_valid(stat)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 		 * If we had writes in this stat window and the window is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 		 * current, we're only doing writes. If a task recently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 		 * waited or still has writes in flights, consider us doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 		 * just writes as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 		if (stat[WRITE].nr_samples || wb_recent_wait(rwb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 		    wbt_inflight(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 			return LAT_UNKNOWN_WRITES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 		return LAT_UNKNOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	 * If the 'min' latency exceeds our target, step down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	if (stat[READ].min > rwb->min_lat_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 		trace_wbt_lat(bdi, stat[READ].min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 		trace_wbt_stat(bdi, stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 		return LAT_EXCEEDED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	if (rqd->scale_step)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 		trace_wbt_stat(bdi, stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	return LAT_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 			rwb->wb_background, rwb->wb_normal, rqd->max_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) static void calc_wb_limits(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	if (rwb->min_lat_nsec == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 		rwb->wb_normal = rwb->wb_background = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	} else if (rwb->rq_depth.max_depth <= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 		rwb->wb_normal = rwb->rq_depth.max_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 		rwb->wb_background = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 		rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 		rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) static void scale_up(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	if (!rq_depth_scale_up(&rwb->rq_depth))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	calc_wb_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	rwb->unknown_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	rwb_wake_all(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	rwb_trace_step(rwb, tracepoint_string("scale up"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static void scale_down(struct rq_wb *rwb, bool hard_throttle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	calc_wb_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	rwb->unknown_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	rwb_trace_step(rwb, tracepoint_string("scale down"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) static void rwb_arm_timer(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	if (rqd->scale_step > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 		 * We should speed this up, using some variant of a fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		 * integer inverse square root calculation. Since we only do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 		 * this for every window expiration, it's not a huge deal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 		 * though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 					int_sqrt((rqd->scale_step + 1) << 8));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 		 * For step < 0, we don't want to increase/decrease the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 		 * window size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 		rwb->cur_win_nsec = rwb->win_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) static void wb_timer_fn(struct blk_stat_callback *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 	struct rq_wb *rwb = cb->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 	struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	unsigned int inflight = wbt_inflight(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	status = latency_exceeded(rwb, cb->stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 			inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	 * If we exceeded the latency target, step down. If we did not,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	 * step one level up. If we don't know enough to say either exceeded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	 * or ok, then don't do anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 	switch (status) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	case LAT_EXCEEDED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 		scale_down(rwb, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	case LAT_OK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 		scale_up(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	case LAT_UNKNOWN_WRITES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 		 * We started a the center step, but don't have a valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 		 * read/write sample, but we do have writes going on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 		 * Allow step to go negative, to increase write perf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 		scale_up(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	case LAT_UNKNOWN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 		if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		 * We get here when previously scaled reduced depth, and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 		 * currently don't have a valid read/write sample. For that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 		 * case, slowly return to center state (step == 0).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 		if (rqd->scale_step > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 			scale_up(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 		else if (rqd->scale_step < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 			scale_down(rwb, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 	 * Re-arm timer, if we have IO in flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 	if (rqd->scale_step || inflight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 		rwb_arm_timer(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) static void wbt_update_limits(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 	struct rq_depth *rqd = &rwb->rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	rqd->scale_step = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	rqd->scaled_max = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	rq_depth_calc_max_depth(rqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	calc_wb_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 	rwb_wake_all(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) u64 wbt_get_min_lat(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	if (!rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 	return RQWB(rqos)->min_lat_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) void wbt_set_min_lat(struct request_queue *q, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 	struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	if (!rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 	RQWB(rqos)->min_lat_nsec = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 	RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	wbt_update_limits(RQWB(rqos));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) static bool close_io(struct rq_wb *rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 	const unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 	return time_before(now, rwb->last_issue + HZ / 10) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		time_before(now, rwb->last_comp + HZ / 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) #define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	unsigned int limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 	 * If we got disabled, just return UINT_MAX. This ensures that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	 * we'll properly inc a new IO, and dec+wakeup at the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 	if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 		return UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 	if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 		return rwb->wb_background;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	 * At this point we know it's a buffered write. If this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	 * kswapd trying to free memory, or REQ_SYNC is set, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 	 * that. If the write is marked as a background write, then use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 	 * the idle limit, or go to normal if we haven't had competing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 	 * IO for a bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 		limit = rwb->rq_depth.max_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 		 * If less than 100ms since we completed unrelated IO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 		 * limit us to half the depth for background writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 		limit = rwb->wb_background;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 		limit = rwb->wb_normal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 	return limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) struct wbt_wait_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 	struct rq_wb *rwb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 	enum wbt_flags wb_acct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 	unsigned long rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	struct wbt_wait_data *data = private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	struct wbt_wait_data *data = private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 	wbt_rqw_done(data->rwb, rqw, data->wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)  * Block if we will exceed our limit, or if we are currently waiting for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)  * the timer to kick off queuing again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 		       unsigned long rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) 	struct wbt_wait_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 		.rwb = rwb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 		.wb_acct = wb_acct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 		.rw = rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 	rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 	switch (bio_op(bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 	case REQ_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 		 * Don't throttle WRITE_ODIRECT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 		if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 		    (REQ_SYNC | REQ_IDLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 	case REQ_OP_DISCARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 	enum wbt_flags flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 	if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	if (bio_op(bio) == REQ_OP_READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 		flags = WBT_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	} else if (wbt_should_throttle(rwb, bio)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 		if (current_is_kswapd())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 			flags |= WBT_KSWAPD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 		if (bio_op(bio) == REQ_OP_DISCARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 			flags |= WBT_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 		flags |= WBT_TRACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 	return flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 	enum wbt_flags flags = bio_to_wbt_flags(rwb, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 	__wbt_done(rqos, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)  * Returns true if the IO request should be accounted, false if not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)  * May sleep, if we have exceeded the writeback limits. Caller can pass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)  * in an irq held spinlock, if it holds one when calling this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)  * If we do sleep, we'll release and re-grab it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	enum wbt_flags flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 	flags = bio_to_wbt_flags(rwb, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) 	if (!(flags & WBT_TRACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 		if (flags & WBT_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 			wb_timestamp(rwb, &rwb->last_issue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 	__wbt_wait(rwb, flags, bio->bi_opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 	if (!blk_stat_is_active(rwb->cb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 		rwb_arm_timer(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 	rq->wbt_flags |= bio_to_wbt_flags(rwb, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) static void wbt_issue(struct rq_qos *rqos, struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 	if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) 	 * Track sync issue, in case it takes a long time to complete. Allows us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) 	 * to react quicker, if a sync IO takes a long time to complete. Note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 	 * that this is just a hint. The request can go away when it completes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 	 * so it's important we never dereference it. We only use the address to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 	 * compare with, which is why we store the sync_issue time locally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 	if (wbt_is_read(rq) && !rwb->sync_issue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 		rwb->sync_cookie = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 		rwb->sync_issue = rq->io_start_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 	if (!rwb_enabled(rwb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 	if (rq == rwb->sync_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 		rwb->sync_issue = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) 		rwb->sync_cookie = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 	struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 	if (rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) 		RQWB(rqos)->wc = write_cache_on;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)  * Enable wbt if defaults are configured that way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) void wbt_enable_default(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) 	struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 	/* Throttling already enabled? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) 	if (rqos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) 		if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) 			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) 	/* Queue not registered? Maybe shutting down... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) 	if (!blk_queue_registered(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) 	if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) 		wbt_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) EXPORT_SYMBOL_GPL(wbt_enable_default);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) u64 wbt_default_latency_nsec(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) 	 * We default to 2msec for non-rotational storage, and 75msec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) 	 * for rotational storage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) 	if (blk_queue_nonrot(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) 		return 2000000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) 		return 75000000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) static int wbt_data_dir(const struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) 	const int op = req_op(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) 	if (op == REQ_OP_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) 		return READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) 	else if (op_is_write(op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) 		return WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) 	/* don't account */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) static void wbt_queue_depth_changed(struct rq_qos *rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) 	RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) 	wbt_update_limits(RQWB(rqos));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) static void wbt_exit(struct rq_qos *rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) 	struct request_queue *q = rqos->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) 	blk_stat_remove_callback(q, rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) 	blk_stat_free_callback(rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) 	kfree(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)  * Disable wbt, if enabled by default.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) void wbt_disable_default(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) 	struct rq_qos *rqos = wbt_rq_qos(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) 	struct rq_wb *rwb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) 	if (!rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) 	rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) 	if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) 		blk_stat_deactivate(rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) 		rwb->enable_state = WBT_STATE_OFF_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) EXPORT_SYMBOL_GPL(wbt_disable_default);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) #ifdef CONFIG_BLK_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) static int wbt_curr_win_nsec_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) 	seq_printf(m, "%llu\n", rwb->cur_win_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) static int wbt_enabled_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) 	seq_printf(m, "%d\n", rwb->enable_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) static int wbt_id_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) 	seq_printf(m, "%u\n", rqos->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) static int wbt_inflight_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) 	for (i = 0; i < WBT_NUM_RWQ; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) 		seq_printf(m, "%d: inflight %d\n", i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) 			   atomic_read(&rwb->rq_wait[i].inflight));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) static int wbt_min_lat_nsec_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) 	seq_printf(m, "%lu\n", rwb->min_lat_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) static int wbt_unknown_cnt_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) 	seq_printf(m, "%u\n", rwb->unknown_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) static int wbt_normal_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) 	seq_printf(m, "%u\n", rwb->wb_normal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) static int wbt_background_show(void *data, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) 	struct rq_qos *rqos = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) 	struct rq_wb *rwb = RQWB(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) 	seq_printf(m, "%u\n", rwb->wb_background);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) 	{"curr_win_nsec", 0400, wbt_curr_win_nsec_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) 	{"enabled", 0400, wbt_enabled_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) 	{"id", 0400, wbt_id_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) 	{"inflight", 0400, wbt_inflight_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) 	{"min_lat_nsec", 0400, wbt_min_lat_nsec_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) 	{"unknown_cnt", 0400, wbt_unknown_cnt_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) 	{"wb_normal", 0400, wbt_normal_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) 	{"wb_background", 0400, wbt_background_show},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) 	{},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) static struct rq_qos_ops wbt_rqos_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) 	.throttle = wbt_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) 	.issue = wbt_issue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) 	.track = wbt_track,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) 	.requeue = wbt_requeue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) 	.done = wbt_done,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) 	.cleanup = wbt_cleanup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) 	.queue_depth_changed = wbt_queue_depth_changed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) 	.exit = wbt_exit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) #ifdef CONFIG_BLK_DEBUG_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) 	.debugfs_attrs = wbt_debugfs_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) int wbt_init(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) 	struct rq_wb *rwb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) 	if (!rwb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) 	rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) 	if (!rwb->cb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) 		kfree(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) 	for (i = 0; i < WBT_NUM_RWQ; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) 		rq_wait_init(&rwb->rq_wait[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) 	rwb->rqos.id = RQ_QOS_WBT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) 	rwb->rqos.ops = &wbt_rqos_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) 	rwb->rqos.q = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) 	rwb->last_comp = rwb->last_issue = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) 	rwb->win_nsec = RWB_WINDOW_NSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) 	rwb->wc = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) 	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) 	wbt_update_limits(rwb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) 	 * Assign rwb and add the stats callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) 	rq_qos_add(q, &rwb->rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) 	blk_stat_add_callback(q, rwb->cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) 	wbt_queue_depth_changed(&rwb->rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) 	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) }