^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Block rq-qos base io controller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * This works similar to wbt with a few exceptions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * - It's bio based, so the latency covers the whole block layer in addition to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * the actual io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * - We will throttle all IO that comes in here if we need to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * - We use the mean latency over the 100ms window. This is because writes can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * be particularly fast, which could give us a false sense of the impact of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * other workloads on our protected workload.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * - By default there's no throttling, we set the queue_depth to UINT_MAX so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * that we can have as many outstanding bio's as we're allowed to. Only at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * throttle time do we pay attention to the actual queue depth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * The hierarchy works like the cpu controller does, we track the latency at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * every configured node, and each configured node has it's own independent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * queue depth. This means that we only care about our latency targets at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * peer level. Some group at the bottom of the hierarchy isn't going to affect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * a group at the end of some other path if we're only configred at leaf level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Consider the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * root blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * / \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * fast (target=5ms) slow (target=10ms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * / \ / \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * a b normal(15ms) unloved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * "a" and "b" have no target, but their combined io under "fast" cannot exceed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * an average latency of 5ms. If it does then we will throttle the "slow"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * group. In the case of "normal", if it exceeds its 15ms target, we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * throttle "unloved", but nobody else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * In this example "fast", "slow", and "normal" will be the only groups actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * accounting their io latencies. We have to walk up the heirarchy to the root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * on every submit and complete so we can do the appropriate stat recording and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * adjust the queue depth of ourselves if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * There are 2 ways we throttle IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * 1) Queue depth throttling. As we throttle down we will adjust the maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * number of IO's we're allowed to have in flight. This starts at (u64)-1 down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * to 1. If the group is only ever submitting IO for itself then this is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * only way we throttle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * 2) Induced delay throttling. This is for the case that a group is generating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * IO that has to be issued by the root cg to avoid priority inversion. So think
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * REQ_META or REQ_SWAP. If we are already at qd == 1 and we're getting a lot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * of work done for us on behalf of the root cg and are being asked to scale
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * down more then we induce a latency at userspace return. We accumulate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * total amount of time we need to be punished by doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * total_time += min_lat_nsec - actual_io_completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * and then at throttle time will do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * throttle_time = min(total_time, NSEC_PER_SEC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * This induced delay will throttle back the activity that is generating the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * root cg issued io's, wethere that's some metadata intensive operation or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * group is using so much memory that it is pushing us into swap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * Copyright (C) 2018 Josef Bacik
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #include <linux/blk_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #include <linux/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #include <linux/sched/loadavg.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #include <trace/events/block.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #include <linux/blk-mq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #include "blk-rq-qos.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #include "blk-stat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #define DEFAULT_SCALE_COOKIE 1000000U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static struct blkcg_policy blkcg_policy_iolatency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) struct iolatency_grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) struct blk_iolatency {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) struct rq_qos rqos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct timer_list timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) atomic_t enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) return container_of(rqos, struct blk_iolatency, rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return atomic_read(&blkiolat->enabled) > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) struct child_latency_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) /* Last time we adjusted the scale of everybody. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) u64 last_scale_event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /* The latency that we missed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) u64 scale_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) /* Total io's from all of our children for the last summation. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) u64 nr_samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /* The guy who actually changed the latency numbers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct iolatency_grp *scale_grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) /* Cookie to tell if we need to scale up or down. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) atomic_t scale_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) struct percentile_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) u64 total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) u64 missed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct latency_stat {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) struct percentile_stats ps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct blk_rq_stat rqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) struct iolatency_grp {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) struct blkg_policy_data pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) struct latency_stat __percpu *stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct latency_stat cur_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) struct blk_iolatency *blkiolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) struct rq_depth rq_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct rq_wait rq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) atomic64_t window_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) atomic_t scale_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) u64 min_lat_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) u64 cur_win_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /* total running average of our io latency. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) u64 lat_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /* Our current number of IO's for the last summation. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) u64 nr_samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) bool ssd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) struct child_latency_info child_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) #define BLKIOLATENCY_MIN_WIN_SIZE (100 * NSEC_PER_MSEC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) #define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * These are the constants used to fake the fixed-point moving average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * calculation just like load average. The call to calc_load() folds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * (FIXED_1 (2048) - exp_factor) * new_sample into lat_avg. The sampling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * window size is bucketed to try to approximately calculate average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * latency such that 1/exp (decay rate) is [1 min, 2.5 min) when windows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * elapse immediately. Note, windows only elapse with IO activity. Idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * periods extend the most recent window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) #define BLKIOLATENCY_NR_EXP_FACTORS 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) #define BLKIOLATENCY_EXP_BUCKET_SIZE (BLKIOLATENCY_MAX_WIN_SIZE / \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) (BLKIOLATENCY_NR_EXP_FACTORS - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static const u64 iolatency_exp_factors[BLKIOLATENCY_NR_EXP_FACTORS] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 2045, // exp(1/600) - 600 samples
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 2039, // exp(1/240) - 240 samples
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 2031, // exp(1/120) - 120 samples
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 2023, // exp(1/80) - 80 samples
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 2014, // exp(1/60) - 60 samples
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) static inline struct iolatency_grp *pd_to_lat(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) return pd ? container_of(pd, struct iolatency_grp, pd) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) static inline struct iolatency_grp *blkg_to_lat(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) return pd_to_lat(blkg_to_pd(blkg, &blkcg_policy_iolatency));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) return pd_to_blkg(&iolat->pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) static inline void latency_stat_init(struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) struct latency_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (iolat->ssd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) stat->ps.total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) stat->ps.missed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) blk_rq_stat_init(&stat->rqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) static inline void latency_stat_sum(struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct latency_stat *sum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct latency_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) if (iolat->ssd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) sum->ps.total += stat->ps.total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) sum->ps.missed += stat->ps.missed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) blk_rq_stat_sum(&sum->rqs, &stat->rqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) static inline void latency_stat_record_time(struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) u64 req_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) struct latency_stat *stat = get_cpu_ptr(iolat->stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if (iolat->ssd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) if (req_time >= iolat->min_lat_nsec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) stat->ps.missed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) stat->ps.total++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) blk_rq_stat_add(&stat->rqs, req_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) put_cpu_ptr(stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) static inline bool latency_sum_ok(struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) struct latency_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) if (iolat->ssd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) u64 thresh = div64_u64(stat->ps.total, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) thresh = max(thresh, 1ULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) return stat->ps.missed < thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) return stat->rqs.mean <= iolat->min_lat_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) struct latency_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) if (iolat->ssd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) return stat->ps.total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) return stat->rqs.nr_samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) struct latency_stat *stat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) int exp_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if (iolat->ssd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * calc_load() takes in a number stored in fixed point representation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * Because we are using this for IO time in ns, the values stored
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * are significantly larger than the FIXED_1 denominator (2048).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * Therefore, rounding errors in the calculation are negligible and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * can be ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) div64_u64(iolat->cur_win_nsec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) BLKIOLATENCY_EXP_BUCKET_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) iolat->lat_avg = calc_load(iolat->lat_avg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) iolatency_exp_factors[exp_idx],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) stat->rqs.mean);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) atomic_dec(&rqw->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) wake_up(&rqw->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) struct iolatency_grp *iolat = private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) bool issue_as_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) bool use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct rq_wait *rqw = &iolat->rq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) if (use_delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) blkcg_schedule_throttle(rqos->q, use_memdelay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * To avoid priority inversions we want to just take a slot if we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * issuing as root. If we're being killed off there's no point in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * delaying things, we may have been killed by OOM so throttling may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * make recovery take even longer, so just let the IO's through so the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * task can go away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) if (issue_as_root || fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) atomic_inc(&rqw->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) #define SCALE_DOWN_FACTOR 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) #define SCALE_UP_FACTOR 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) static inline unsigned long scale_amount(unsigned long qd, bool up)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return max(up ? qd >> SCALE_UP_FACTOR : qd >> SCALE_DOWN_FACTOR, 1UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * We scale the qd down faster than we scale up, so we need to use this helper
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * to adjust the scale_cookie accordingly so we don't prematurely get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * scale_cookie at DEFAULT_SCALE_COOKIE and unthrottle too much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * Each group has their own local copy of the last scale cookie they saw, so if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * the global scale cookie goes up or down they know which way they need to go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * based on their last knowledge of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) static void scale_cookie_change(struct blk_iolatency *blkiolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) struct child_latency_info *lat_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) bool up)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) unsigned long qd = blkiolat->rqos.q->nr_requests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) unsigned long scale = scale_amount(qd, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) unsigned long old = atomic_read(&lat_info->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) unsigned long max_scale = qd << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) unsigned long diff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (old < DEFAULT_SCALE_COOKIE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) diff = DEFAULT_SCALE_COOKIE - old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (up) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (scale + old > DEFAULT_SCALE_COOKIE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) atomic_set(&lat_info->scale_cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) DEFAULT_SCALE_COOKIE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) else if (diff > qd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) atomic_inc(&lat_info->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) atomic_add(scale, &lat_info->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * We don't want to dig a hole so deep that it takes us hours to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * dig out of it. Just enough that we don't throttle/unthrottle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) * with jagged workloads but can still unthrottle once pressure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * has sufficiently dissipated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) if (diff > qd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (diff < max_scale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) atomic_dec(&lat_info->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) atomic_sub(scale, &lat_info->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * Change the queue depth of the iolatency_grp. We add/subtract 1/16th of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * queue depth at a time so we don't get wild swings and hopefully dial in to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * fairer distribution of the overall queue depth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) static void scale_change(struct iolatency_grp *iolat, bool up)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) unsigned long scale = scale_amount(qd, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) unsigned long old = iolat->rq_depth.max_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (old > qd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) old = qd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (up) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (old == 1 && blkcg_unuse_delay(lat_to_blkg(iolat)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (old < qd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) old += scale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) old = min(old, qd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) iolat->rq_depth.max_depth = old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) wake_up_all(&iolat->rq_wait.wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) old >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) iolat->rq_depth.max_depth = max(old, 1UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /* Check our parent and see if the scale cookie has changed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) static void check_scale_change(struct iolatency_grp *iolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) struct iolatency_grp *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) struct child_latency_info *lat_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) unsigned int cur_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) unsigned int our_cookie = atomic_read(&iolat->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) u64 scale_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) unsigned int old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) int direction = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) if (lat_to_blkg(iolat)->parent == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) parent = blkg_to_lat(lat_to_blkg(iolat)->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) if (!parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) lat_info = &parent->child_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) cur_cookie = atomic_read(&lat_info->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) scale_lat = READ_ONCE(lat_info->scale_lat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) if (cur_cookie < our_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) direction = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) else if (cur_cookie > our_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) direction = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) old = atomic_cmpxchg(&iolat->scale_cookie, our_cookie, cur_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) /* Somebody beat us to the punch, just bail. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (old != our_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) if (direction < 0 && iolat->min_lat_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) u64 samples_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (!scale_lat || iolat->min_lat_nsec <= scale_lat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) * Sometimes high priority groups are their own worst enemy, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) * instead of taking it out on some poor other group that did 5%
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * or less of the IO's for the last summation just skip this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * scale down event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) samples_thresh = lat_info->nr_samples * 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) samples_thresh = max(1ULL, div64_u64(samples_thresh, 100));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) if (iolat->nr_samples <= samples_thresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) /* We're as low as we can go. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) if (iolat->rq_depth.max_depth == 1 && direction < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) blkcg_use_delay(lat_to_blkg(iolat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) /* We're back to the default cookie, unthrottle all the things. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (cur_cookie == DEFAULT_SCALE_COOKIE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) blkcg_clear_delay(lat_to_blkg(iolat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) iolat->rq_depth.max_depth = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) wake_up_all(&iolat->rq_wait.wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) scale_change(iolat, direction > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) struct blkcg_gq *blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) bool issue_as_root = bio_issue_as_root_blkg(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) if (!blk_iolatency_enabled(blkiolat))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) while (blkg && blkg->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) struct iolatency_grp *iolat = blkg_to_lat(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (!iolat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) check_scale_change(iolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) __blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) if (!timer_pending(&blkiolat->timer))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) mod_timer(&blkiolat->timer, jiffies + HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) static void iolatency_record_time(struct iolatency_grp *iolat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) struct bio_issue *issue, u64 now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) bool issue_as_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) u64 start = bio_issue_time(issue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) u64 req_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * Have to do this so we are truncated to the correct time that our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * issue is truncated to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) now = __bio_issue_time(now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (now <= start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) req_time = now - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * We don't want to count issue_as_root bio's in the cgroups latency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * statistics as it could skew the numbers downwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) if (unlikely(issue_as_root && iolat->rq_depth.max_depth != UINT_MAX)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) u64 sub = iolat->min_lat_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (req_time < sub)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) blkcg_add_delay(lat_to_blkg(iolat), now, sub - req_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) latency_stat_record_time(iolat, req_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) #define BLKIOLATENCY_MIN_GOOD_SAMPLES 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) struct blkcg_gq *blkg = lat_to_blkg(iolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) struct iolatency_grp *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) struct child_latency_info *lat_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) struct latency_stat stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) latency_stat_init(iolat, &stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) struct latency_stat *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) s = per_cpu_ptr(iolat->stats, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) latency_stat_sum(iolat, &stat, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) latency_stat_init(iolat, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) parent = blkg_to_lat(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) if (!parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) lat_info = &parent->child_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) iolat_update_total_lat_avg(iolat, &stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) /* Everything is ok and we don't need to adjust the scale. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) if (latency_sum_ok(iolat, &stat) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) /* Somebody beat us to the punch, just bail. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) spin_lock_irqsave(&lat_info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) latency_stat_sum(iolat, &iolat->cur_stat, &stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) lat_info->nr_samples -= iolat->nr_samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) if ((lat_info->last_scale_event >= now ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) if (latency_sum_ok(iolat, &iolat->cur_stat) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) latency_sum_ok(iolat, &stat)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) if (latency_stat_samples(iolat, &iolat->cur_stat) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) BLKIOLATENCY_MIN_GOOD_SAMPLES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) if (lat_info->scale_grp == iolat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) lat_info->last_scale_event = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) scale_cookie_change(iolat->blkiolat, lat_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) } else if (lat_info->scale_lat == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) lat_info->scale_lat >= iolat->min_lat_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) lat_info->last_scale_event = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) if (!lat_info->scale_grp ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) lat_info->scale_lat > iolat->min_lat_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) WRITE_ONCE(lat_info->scale_lat, iolat->min_lat_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) lat_info->scale_grp = iolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) scale_cookie_change(iolat->blkiolat, lat_info, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) latency_stat_init(iolat, &iolat->cur_stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) spin_unlock_irqrestore(&lat_info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) struct rq_wait *rqw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) struct iolatency_grp *iolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) u64 window_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) u64 now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) bool issue_as_root = bio_issue_as_root_blkg(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) bool enabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) int inflight = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (!blkg || !bio_flagged(bio, BIO_TRACKED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) iolat = blkg_to_lat(bio->bi_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) if (!iolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) enabled = blk_iolatency_enabled(iolat->blkiolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) if (!enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) now = ktime_to_ns(ktime_get());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) while (blkg && blkg->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) iolat = blkg_to_lat(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) if (!iolat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) rqw = &iolat->rq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) inflight = atomic_dec_return(&rqw->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) WARN_ON_ONCE(inflight < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * If bi_status is BLK_STS_AGAIN, the bio wasn't actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * submitted, so do not account for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) if (iolat->min_lat_nsec && bio->bi_status != BLK_STS_AGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) iolatency_record_time(iolat, &bio->bi_issue, now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) issue_as_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) window_start = atomic64_read(&iolat->window_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) if (now > window_start &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) (now - window_start) >= iolat->cur_win_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) if (atomic64_cmpxchg(&iolat->window_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) window_start, now) == window_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) iolatency_check_latencies(iolat, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) wake_up(&rqw->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) static void blkcg_iolatency_exit(struct rq_qos *rqos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) del_timer_sync(&blkiolat->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) kfree(blkiolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) static struct rq_qos_ops blkcg_iolatency_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) .throttle = blkcg_iolatency_throttle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) .done_bio = blkcg_iolatency_done_bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) .exit = blkcg_iolatency_exit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) static void blkiolatency_timer_fn(struct timer_list *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) u64 now = ktime_to_ns(ktime_get());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) blkg_for_each_descendant_pre(blkg, pos_css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) blkiolat->rqos.q->root_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) struct iolatency_grp *iolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) struct child_latency_info *lat_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) u64 cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) * We could be exiting, don't access the pd unless we have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) * ref on the blkg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) if (!blkg_tryget(blkg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) iolat = blkg_to_lat(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (!iolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) lat_info = &iolat->child_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) cookie = atomic_read(&lat_info->scale_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) if (cookie >= DEFAULT_SCALE_COOKIE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) spin_lock_irqsave(&lat_info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (lat_info->last_scale_event >= now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) goto next_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * We scaled down but don't have a scale_grp, scale up and carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) if (lat_info->scale_grp == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) scale_cookie_change(iolat->blkiolat, lat_info, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) goto next_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * It's been 5 seconds since our last scale event, clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * scale grp in case the group that needed the scale down isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) * doing any IO currently.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (now - lat_info->last_scale_event >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) ((u64)NSEC_PER_SEC * 5))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) lat_info->scale_grp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) next_lock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) spin_unlock_irqrestore(&lat_info->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) blkg_put(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) int blk_iolatency_init(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) struct blk_iolatency *blkiolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) struct rq_qos *rqos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) if (!blkiolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) rqos = &blkiolat->rqos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) rqos->id = RQ_QOS_LATENCY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) rqos->ops = &blkcg_iolatency_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) rqos->q = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) rq_qos_add(q, rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) rq_qos_del(q, rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) kfree(blkiolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * return 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) struct iolatency_grp *iolat = blkg_to_lat(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) u64 oldval = iolat->min_lat_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) iolat->min_lat_nsec = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) iolat->cur_win_nsec = max_t(u64, val << 4, BLKIOLATENCY_MIN_WIN_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) BLKIOLATENCY_MAX_WIN_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) if (!oldval && val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) if (oldval && !val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) blkcg_clear_delay(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) static void iolatency_clear_scaling(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) if (blkg->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct iolatency_grp *iolat = blkg_to_lat(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) struct child_latency_info *lat_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) if (!iolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) lat_info = &iolat->child_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) spin_lock(&lat_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) atomic_set(&lat_info->scale_cookie, DEFAULT_SCALE_COOKIE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) lat_info->last_scale_event = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) lat_info->scale_grp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) lat_info->scale_lat = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) spin_unlock(&lat_info->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) struct blkcg *blkcg = css_to_blkcg(of_css(of));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) struct blkg_conf_ctx ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) struct iolatency_grp *iolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) char *p, *tok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) u64 lat_val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) u64 oldval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) int enable = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) iolat = blkg_to_lat(ctx.blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) p = ctx.body;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) while ((tok = strsep(&p, " "))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) char key[16];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) char val[21]; /* 18446744073709551616 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) if (sscanf(tok, "%15[^=]=%20s", key, val) != 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) if (!strcmp(key, "target")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) u64 v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (!strcmp(val, "max"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) lat_val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) else if (sscanf(val, "%llu", &v) == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) lat_val = v * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) /* Walk up the tree to see if our new val is lower than it should be. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) blkg = ctx.blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) oldval = iolat->min_lat_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) enable = iolatency_set_min_lat_nsec(blkg, lat_val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (enable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) if (!blk_get_queue(blkg->q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) blkg_get(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) if (oldval != iolat->min_lat_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) iolatency_clear_scaling(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) blkg_conf_finish(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) if (ret == 0 && enable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) struct iolatency_grp *tmp = blkg_to_lat(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) struct blk_iolatency *blkiolat = tmp->blkiolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) blk_mq_freeze_queue(blkg->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) if (enable == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) atomic_inc(&blkiolat->enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) else if (enable == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) atomic_dec(&blkiolat->enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) blk_mq_unfreeze_queue(blkg->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) blkg_put(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) blk_put_queue(blkg->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) static u64 iolatency_prfill_limit(struct seq_file *sf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) struct blkg_policy_data *pd, int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) struct iolatency_grp *iolat = pd_to_lat(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) const char *dname = blkg_dev_name(pd->blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (!dname || !iolat->min_lat_nsec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) seq_printf(sf, "%s target=%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) dname, div_u64(iolat->min_lat_nsec, NSEC_PER_USEC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) static int iolatency_print_limit(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) iolatency_prfill_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) &blkcg_policy_iolatency, seq_cft(sf)->private, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) struct latency_stat stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) latency_stat_init(iolat, &stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) struct latency_stat *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) s = per_cpu_ptr(iolat->stats, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) latency_stat_sum(iolat, &stat, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (iolat->rq_depth.max_depth == UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) (unsigned long long)stat.ps.missed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) (unsigned long long)stat.ps.total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) (unsigned long long)stat.ps.missed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) (unsigned long long)stat.ps.total,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) iolat->rq_depth.max_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) struct iolatency_grp *iolat = pd_to_lat(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) unsigned long long avg_lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) unsigned long long cur_win;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) if (!blkcg_debug_stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) if (iolat->ssd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) return iolatency_ssd_stat(iolat, buf, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (iolat->rq_depth.max_depth == UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) avg_lat, cur_win);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) return scnprintf(buf, size, " depth=%u avg_lat=%llu win=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) iolat->rq_depth.max_depth, avg_lat, cur_win);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) struct blkcg *blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) struct iolatency_grp *iolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) if (!iolat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) __alignof__(struct latency_stat), gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (!iolat->stats) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) kfree(iolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) return &iolat->pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) static void iolatency_pd_init(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) struct iolatency_grp *iolat = pd_to_lat(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) struct blkcg_gq *blkg = lat_to_blkg(iolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) u64 now = ktime_to_ns(ktime_get());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (blk_queue_nonrot(blkg->q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) iolat->ssd = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) iolat->ssd = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) struct latency_stat *stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) stat = per_cpu_ptr(iolat->stats, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) latency_stat_init(iolat, stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) latency_stat_init(iolat, &iolat->cur_stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) rq_wait_init(&iolat->rq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) spin_lock_init(&iolat->child_lat.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) iolat->rq_depth.queue_depth = blkg->q->nr_requests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) iolat->rq_depth.max_depth = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) iolat->blkiolat = blkiolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) iolat->cur_win_nsec = 100 * NSEC_PER_MSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) atomic64_set(&iolat->window_start, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) * We init things in list order, so the pd for the parent may not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) * init'ed yet for whatever reason.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (blkg->parent && blkg_to_pd(blkg->parent, &blkcg_policy_iolatency)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) struct iolatency_grp *parent = blkg_to_lat(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) atomic_set(&iolat->scale_cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) atomic_read(&parent->child_lat.scale_cookie));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) atomic_set(&iolat->scale_cookie, DEFAULT_SCALE_COOKIE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) atomic_set(&iolat->child_lat.scale_cookie, DEFAULT_SCALE_COOKIE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) static void iolatency_pd_offline(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) struct iolatency_grp *iolat = pd_to_lat(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) struct blkcg_gq *blkg = lat_to_blkg(iolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) struct blk_iolatency *blkiolat = iolat->blkiolat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) ret = iolatency_set_min_lat_nsec(blkg, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) if (ret == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) atomic_inc(&blkiolat->enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (ret == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) atomic_dec(&blkiolat->enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) iolatency_clear_scaling(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) static void iolatency_pd_free(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) struct iolatency_grp *iolat = pd_to_lat(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) free_percpu(iolat->stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) kfree(iolat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) static struct cftype iolatency_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) .name = "latency",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) .flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) .seq_show = iolatency_print_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) .write = iolatency_set_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) static struct blkcg_policy blkcg_policy_iolatency = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) .dfl_cftypes = iolatency_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) .pd_alloc_fn = iolatency_pd_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) .pd_init_fn = iolatency_pd_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) .pd_offline_fn = iolatency_pd_offline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) .pd_free_fn = iolatency_pd_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) .pd_stat_fn = iolatency_pd_stat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) static int __init iolatency_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) return blkcg_policy_register(&blkcg_policy_iolatency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) static void __exit iolatency_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) blkcg_policy_unregister(&blkcg_policy_iolatency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) module_init(iolatency_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) module_exit(iolatency_exit);