^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Interface for controlling IO bandwidth on a request queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/blktrace_api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "blk-cgroup-rwstat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) /* Max dispatch from a group in 1 round */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define THROTL_GRP_QUANTUM 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) /* Total max dispatch from all groups in one round */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define THROTL_QUANTUM 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) /* Throttling is performed over a slice and after that slice is renewed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define DFL_THROTL_SLICE_HD (HZ / 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define DFL_THROTL_SLICE_SSD (HZ / 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define MAX_THROTL_SLICE (HZ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define MIN_THROTL_BPS (320 * 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define MIN_THROTL_IOPS (10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define DFL_LATENCY_TARGET (-1L)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define DFL_IDLE_THRESHOLD (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define LATENCY_FILTERED_SSD (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * For HD, very small latency comes from sequential IO. Such IO is helpless to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * help determine if its IO is impacted by others, hence we ignore the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define LATENCY_FILTERED_HD (1000L) /* 1ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static struct blkcg_policy blkcg_policy_throtl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /* A workqueue to queue throttle related work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static struct workqueue_struct *kthrotld_workqueue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * To implement hierarchical throttling, throtl_grps form a tree and bios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * are dispatched upwards level by level until they reach the top and get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * issued. When dispatching bios from the children and local group at each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * level, if the bios are dispatched into a single bio_list, there's a risk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * of a local or child group which can queue many bios at once filling up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * the list starving others.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * To avoid such starvation, dispatched bios are queued separately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * according to where they came from. When they are again dispatched to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * the parent, they're popped in round-robin order so that no single source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * hogs the dispatch window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * throtl_qnode is used to keep the queued bios separated by their sources.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * Bios are queued to throtl_qnode which in turn is queued to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * throtl_service_queue and then dispatched in round-robin order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * It's also used to track the reference counts on blkg's. A qnode always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * belongs to a throtl_grp and gets queued on itself or the parent, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * incrementing the reference of the associated throtl_grp when a qnode is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * queued and decrementing when dequeued is enough to keep the whole blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * tree pinned while bios are in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct throtl_qnode {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct list_head node; /* service_queue->queued[] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct bio_list bios; /* queued bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct throtl_grp *tg; /* tg this qnode belongs to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct throtl_service_queue {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) struct throtl_service_queue *parent_sq; /* the parent service_queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * Bios queued directly to this service_queue or dispatched from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * children throtl_grp's.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) unsigned int nr_queued[2]; /* number of queued bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * RB tree of active children throtl_grp's, which are sorted by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * their ->disptime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct rb_root_cached pending_tree; /* RB tree of active tgs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) unsigned int nr_pending; /* # queued in the tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) unsigned long first_pending_disptime; /* disptime of the first tg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) struct timer_list pending_timer; /* fires on first_pending_disptime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) enum tg_state_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) LIMIT_LOW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) LIMIT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) LIMIT_CNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct throtl_grp {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /* must be the first member */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct blkg_policy_data pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) /* active throtl group service_queue member */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) struct rb_node rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /* throtl_data this group belongs to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) /* this group's service queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) struct throtl_service_queue service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * qnode_on_self is used when bios are directly queued to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * throtl_grp so that local bios compete fairly with bios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * dispatched from children. qnode_on_parent is used when bios are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * dispatched from this throtl_grp into its parent and will compete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * with the sibling qnode_on_parents and the parent's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * qnode_on_self.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) struct throtl_qnode qnode_on_self[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct throtl_qnode qnode_on_parent[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * Dispatch time in jiffies. This is the estimated time when group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * will unthrottle and is ready to dispatch more bio. It is used as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * key to sort active groups in service tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) unsigned long disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /* are there any throtl rules between this group and td? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) bool has_rules[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /* internally used bytes per second rate limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) uint64_t bps[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /* user configured bps limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) uint64_t bps_conf[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /* internally used IOPS limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) unsigned int iops[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /* user configured IOPS limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) unsigned int iops_conf[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /* Number of bytes dispatched in current slice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) uint64_t bytes_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* Number of bio's dispatched in current slice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) unsigned int io_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) unsigned long last_low_overflow_time[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) uint64_t last_bytes_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) unsigned int last_io_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) unsigned long last_check_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) unsigned long latency_target; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) unsigned long latency_target_conf; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) /* When did we start a new slice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) unsigned long slice_start[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) unsigned long slice_end[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) unsigned long last_finish_time; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) unsigned long checked_last_finish_time; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) unsigned long avg_idletime; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) unsigned long idletime_threshold; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) unsigned long idletime_threshold_conf; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) unsigned int bio_cnt; /* total bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) unsigned long bio_cnt_reset_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) atomic_t io_split_cnt[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) atomic_t last_io_split_cnt[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) struct blkg_rwstat stat_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct blkg_rwstat stat_ios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* We measure latency for request size from <= 4k to >= 1M */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) #define LATENCY_BUCKET_SIZE 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) struct latency_bucket {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) unsigned long total_latency; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) int samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) struct avg_latency_bucket {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) unsigned long latency; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) bool valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) struct throtl_data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /* service tree for active throtl groups */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct throtl_service_queue service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct request_queue *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /* Total Number of queued bios on READ and WRITE lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) unsigned int nr_queued[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) unsigned int throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /* Work for dispatching throttled bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct work_struct dispatch_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) unsigned int limit_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) bool limit_valid[LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) unsigned long low_upgrade_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) unsigned long low_downgrade_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) unsigned int scale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) struct latency_bucket __percpu *latency_buckets[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) unsigned long last_calculate_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) unsigned long filtered_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) bool track_bio_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) static void throtl_pending_timer_fn(struct timer_list *t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) return pd_to_blkg(&tg->pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * sq_to_tg - return the throl_grp the specified service queue belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * @sq: the throtl_service_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * Return the throtl_grp @sq belongs to. If @sq is the top-level one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * embedded in throtl_data, %NULL is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (sq && sq->parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) return container_of(sq, struct throtl_grp, service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * sq_to_td - return throtl_data the specified service queue belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * @sq: the throtl_service_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * A service_queue can be embedded in either a throtl_grp or throtl_data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * Determine the associated throtl_data accordingly and return it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) struct throtl_grp *tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) if (tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) return tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) return container_of(sq, struct throtl_data, service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * make the IO dispatch more smooth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * Scale up: linearly scale up according to lapsed time since upgrade. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * every throtl_slice, the limit scales up 1/2 .low limit till the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * limit hits .max limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * Scale down: exponentially scale down if a cgroup doesn't hit its .low limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) static uint64_t throtl_adjusted_limit(uint64_t low, struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) /* arbitrary value to avoid too big scale */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (td->scale < 4096 && time_after_eq(jiffies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) td->low_upgrade_time + td->scale * td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) return low + (low >> 1) * td->scale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct blkcg_gq *blkg = tg_to_blkg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) uint64_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) return U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) ret = tg->bps[rw][td->limit_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) if (ret == 0 && td->limit_index == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) /* intermediate node or iops isn't 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) if (!list_empty(&blkg->blkcg->css.children) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) tg->iops[rw][td->limit_index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) return U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) return MIN_THROTL_BPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) uint64_t adjusted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) ret = min(tg->bps[rw][LIMIT_MAX], adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) struct blkcg_gq *blkg = tg_to_blkg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) unsigned int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) return UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) ret = tg->iops[rw][td->limit_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* intermediate node or bps isn't 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) if (!list_empty(&blkg->blkcg->css.children) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) tg->bps[rw][td->limit_index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) return UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) return MIN_THROTL_IOPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) uint64_t adjusted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (adjusted > UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) adjusted = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) #define request_bucket_index(sectors) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * throtl_log - log debug message via blktrace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * @sq: the service_queue being reported
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * @fmt: printf format string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * @args: printf args
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * throtl_grp; otherwise, just "throtl".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) #define throtl_log(sq, fmt, args...) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) struct throtl_grp *__tg = sq_to_tg((sq)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) struct throtl_data *__td = sq_to_td((sq)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) (void)__td; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (likely(!blk_trace_note_message_enabled(__td->queue))) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) break; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if ((__tg)) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) blk_add_cgroup_trace_msg(__td->queue, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) } else { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) static inline unsigned int throtl_bio_data_size(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /* assume it's one sector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) return 512;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) INIT_LIST_HEAD(&qn->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) bio_list_init(&qn->bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) qn->tg = tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * @bio: bio being added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * @qn: qnode to add bio to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * @queued: the service_queue->queued[] list @qn belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * Add @bio to @qn and put @qn on @queued if it's not already on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * @qn->tg's reference count is bumped when @qn is activated. See the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * comment on top of throtl_qnode definition for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) struct list_head *queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) bio_list_add(&qn->bios, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) if (list_empty(&qn->node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) list_add_tail(&qn->node, queued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) blkg_get(tg_to_blkg(qn->tg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * throtl_peek_queued - peek the first bio on a qnode list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) * @queued: the qnode list to peek
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) static struct bio *throtl_peek_queued(struct list_head *queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) struct throtl_qnode *qn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if (list_empty(queued))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) qn = list_first_entry(queued, struct throtl_qnode, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) bio = bio_list_peek(&qn->bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) WARN_ON_ONCE(!bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) return bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * throtl_pop_queued - pop the first bio form a qnode list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * @queued: the qnode list to pop a bio from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * @tg_to_put: optional out argument for throtl_grp to put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * Pop the first bio from the qnode list @queued. After popping, the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * qnode is removed from @queued if empty or moved to the end of @queued so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * that the popping order is round-robin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * When the first qnode is removed, its associated throtl_grp should be put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * too. If @tg_to_put is NULL, this function automatically puts it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * responsible for putting it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) static struct bio *throtl_pop_queued(struct list_head *queued,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) struct throtl_grp **tg_to_put)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) struct throtl_qnode *qn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) if (list_empty(queued))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) qn = list_first_entry(queued, struct throtl_qnode, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) bio = bio_list_pop(&qn->bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) WARN_ON_ONCE(!bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (bio_list_empty(&qn->bios)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) list_del_init(&qn->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) if (tg_to_put)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) *tg_to_put = qn->tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) blkg_put(tg_to_blkg(qn->tg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) list_move_tail(&qn->node, queued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) return bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) /* init a service_queue, assumes the caller zeroed it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) static void throtl_service_queue_init(struct throtl_service_queue *sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) INIT_LIST_HEAD(&sq->queued[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) INIT_LIST_HEAD(&sq->queued[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) sq->pending_tree = RB_ROOT_CACHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) struct blkcg *blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) tg = kzalloc_node(sizeof(*tg), gfp, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) if (blkg_rwstat_init(&tg->stat_bytes, gfp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) goto err_free_tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) if (blkg_rwstat_init(&tg->stat_ios, gfp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) goto err_exit_stat_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) throtl_service_queue_init(&tg->service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) for (rw = READ; rw <= WRITE; rw++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) throtl_qnode_init(&tg->qnode_on_self[rw], tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) RB_CLEAR_NODE(&tg->rb_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) tg->bps[READ][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) tg->bps[WRITE][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) tg->iops[READ][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) tg->iops[WRITE][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) tg->bps_conf[READ][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) /* LIMIT_LOW will have default value 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) tg->latency_target = DFL_LATENCY_TARGET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) tg->latency_target_conf = DFL_LATENCY_TARGET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) tg->idletime_threshold = DFL_IDLE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) return &tg->pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) err_exit_stat_bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) blkg_rwstat_exit(&tg->stat_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) err_free_tg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) kfree(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) static void throtl_pd_init(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) struct blkcg_gq *blkg = tg_to_blkg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) struct throtl_data *td = blkg->q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * If on the default hierarchy, we switch to properly hierarchical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) * behavior where limits on a given throtl_grp are applied to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * whole subtree rather than just the group itself. e.g. If 16M
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) * read_bps limit is set on the root group, the whole system can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) * exceed 16M for the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) * If not on the default hierarchy, the broken flat hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) * behavior is retained where all throtl_grps are treated as if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * they're all separate root groups right below throtl_data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) * Limits of a group don't interact with limits of other groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) * regardless of the position of the group in the hierarchy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) sq->parent_sq = &td->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) tg->td = td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * Set has_rules[] if @tg or any of its parents have limits configured.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * This doesn't require walking up to the top of the hierarchy as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * parent's has_rules[] is guaranteed to be correct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) static void tg_update_has_rules(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) struct throtl_data *td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) for (rw = READ; rw <= WRITE; rw++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) (td->limit_valid[td->limit_index] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) (tg_bps_limit(tg, rw) != U64_MAX ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) tg_iops_limit(tg, rw) != UINT_MAX));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) static void throtl_pd_online(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * We don't want new groups to escape the limits of its ancestors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * Update has_rules[] after a new group is brought online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) tg_update_has_rules(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) static void blk_throtl_update_limit_valid(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) bool low_valid = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) low_valid = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) td->limit_valid[LIMIT_LOW] = low_valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) static void throtl_upgrade_state(struct throtl_data *td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) static void throtl_pd_offline(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) tg->bps[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) tg->bps[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) tg->iops[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) tg->iops[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) blk_throtl_update_limit_valid(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) if (!tg->td->limit_valid[tg->td->limit_index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) throtl_upgrade_state(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) static void throtl_pd_free(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) del_timer_sync(&tg->service_queue.pending_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) blkg_rwstat_exit(&tg->stat_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) blkg_rwstat_exit(&tg->stat_ios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) kfree(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) static struct throtl_grp *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) throtl_rb_first(struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) n = rb_first_cached(&parent_sq->pending_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) WARN_ON_ONCE(!n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) if (!n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return rb_entry_tg(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) static void throtl_rb_erase(struct rb_node *n,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) rb_erase_cached(n, &parent_sq->pending_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) RB_CLEAR_NODE(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) --parent_sq->nr_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) tg = throtl_rb_first(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) parent_sq->first_pending_disptime = tg->disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) static void tg_service_queue_add(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) struct rb_node *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) struct throtl_grp *__tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) unsigned long key = tg->disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) bool leftmost = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) while (*node != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) parent = *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) __tg = rb_entry_tg(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) if (time_before(key, __tg->disptime))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) node = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) node = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) leftmost = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) rb_link_node(&tg->rb_node, parent, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) leftmost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) static void throtl_enqueue_tg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) if (!(tg->flags & THROTL_TG_PENDING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) tg_service_queue_add(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) tg->flags |= THROTL_TG_PENDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) tg->service_queue.parent_sq->nr_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) static void throtl_dequeue_tg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (tg->flags & THROTL_TG_PENDING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) tg->flags &= ~THROTL_TG_PENDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) /* Call with queue lock held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) unsigned long expires)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * Since we are adjusting the throttle limit dynamically, the sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * time calculated according to previous limit might be invalid. It's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * possible the cgroup sleep time is very long and no other cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * have IO running so notify the limit changes. Make sure the cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) * doesn't sleep too long to avoid the missed notification.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (time_after(expires, max_expire))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) expires = max_expire;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) mod_timer(&sq->pending_timer, expires);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) expires - jiffies, jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) * throtl_schedule_next_dispatch - schedule the next dispatch cycle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) * @sq: the service_queue to schedule dispatch for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) * @force: force scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) * Arm @sq->pending_timer so that the next dispatch cycle starts on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) * dispatch time of the first pending child. Returns %true if either timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) * is armed or there's no pending child left. %false if the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) * dispatch window is still open and the caller should continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) * dispatching.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * If @force is %true, the dispatch timer is always scheduled and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * function is guaranteed to return %true. This is to be used when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * caller can't dispatch itself and needs to invoke pending_timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) * unconditionally. Note that forced scheduling is likely to induce short
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * delay before dispatch starts even if @sq->first_pending_disptime is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) * in the future and thus shouldn't be used in hot paths.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) bool force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) /* any pending children left? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (!sq->nr_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) update_min_dispatch_time(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) /* is the next dispatch time in the future? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) if (force || time_after(sq->first_pending_disptime, jiffies)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) throtl_schedule_pending_timer(sq, sq->first_pending_disptime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) /* tell the caller to continue dispatching */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) bool rw, unsigned long start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) tg->bytes_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) tg->io_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) atomic_set(&tg->io_split_cnt[rw], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * Previous slice has expired. We must have trimmed it after last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * bio dispatch. That means since start of last slice, we never used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * that bandwidth. Do try to make use of that bandwidth while giving
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * credit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) if (time_after_eq(start, tg->slice_start[rw]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) tg->slice_start[rw] = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) "[%c] new slice with credit start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) rw == READ ? 'R' : 'W', tg->slice_start[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) tg->bytes_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) tg->io_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) tg->slice_start[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) atomic_set(&tg->io_split_cnt[rw], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) "[%c] new slice start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) rw == READ ? 'R' : 'W', tg->slice_start[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) unsigned long jiffy_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) unsigned long jiffy_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) throtl_set_slice_end(tg, rw, jiffy_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) "[%c] extend slice start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) rw == READ ? 'R' : 'W', tg->slice_start[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) /* Determine if previously allocated or extended slice is complete or not */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) /* Trim the used slices and adjust slice start accordingly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) unsigned long nr_slices, time_elapsed, io_trim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) u64 bytes_trim, tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * If bps are unlimited (-1), then time slice don't get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * renewed. Don't try to trim the slice if slice is used. A new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * slice will start when appropriate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) if (throtl_slice_used(tg, rw))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) * A bio has been dispatched. Also adjust slice_end. It might happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) * that initially cgroup limit was very low resulting in high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) * slice_end, but later limit was bumped up and bio was dispatched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) * sooner, then we need to reduce slice_end. A high bogus slice_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) * is bad because it does not allow new slice to start.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) time_elapsed = jiffies - tg->slice_start[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) nr_slices = time_elapsed / tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if (!nr_slices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) do_div(tmp, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) bytes_trim = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (!bytes_trim && !io_trim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) if (tg->bytes_disp[rw] >= bytes_trim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) tg->bytes_disp[rw] -= bytes_trim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) tg->bytes_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) if (tg->io_disp[rw] >= io_trim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) tg->io_disp[rw] -= io_trim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) tg->io_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) tg->slice_start[rw], tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) u32 iops_limit, unsigned long *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) unsigned int io_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) if (iops_limit == UINT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) *wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) jiffy_elapsed = jiffies - tg->slice_start[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) /* Round up to the next throttle slice, wait time must be nonzero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * jiffy_elapsed_rnd should not be a big value as minimum iops can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * 1 then at max jiffy elapsed should be equivalent of 1 second as we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * will allow dispatch after 1 second and after that slice should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) * have been trimmed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) tmp = (u64)iops_limit * jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) do_div(tmp, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) if (tmp > UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) io_allowed = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) io_allowed = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (tg->io_disp[rw] + 1 <= io_allowed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) *wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) /* Calc approx time to dispatch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) *wait = jiffy_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) u64 bps_limit, unsigned long *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) u64 bytes_allowed, extra_bytes, tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) unsigned int bio_size = throtl_bio_data_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) if (bps_limit == U64_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) *wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) /* Slice has just started. Consider one slice interval */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) if (!jiffy_elapsed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) jiffy_elapsed_rnd = tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) tmp = bps_limit * jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) do_div(tmp, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) bytes_allowed = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) *wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) /* Calc approx time to dispatch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (!jiffy_wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) jiffy_wait = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) * This wait time is without taking into consideration the rounding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * up we did. Add that time also.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) *wait = jiffy_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) * Returns whether one can dispatch a bio or not. Also returns approx number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) * of jiffies to wait before this bio is with-in IO rate and can be dispatched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) unsigned long *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) u64 bps_limit = tg_bps_limit(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) u32 iops_limit = tg_iops_limit(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * Currently whole state machine of group depends on first bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * queued in the group bio list. So one should not be calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * this function with a different bio if there are other bios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * queued.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) BUG_ON(tg->service_queue.nr_queued[rw] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) /* If tg->bps = -1, then BW is unlimited */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) *wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) * If previous slice expired, start a new one otherwise renew/extend
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * existing slice to make sure it is at least throtl_slice interval
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * long since now. New slice is started only for empty throttle group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * If there is queued bio, that means there should be an active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * slice and it should be extended instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) throtl_start_new_slice(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) if (time_before(tg->slice_end[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) jiffies + tg->td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) throtl_extend_slice(tg, rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) jiffies + tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) if (iops_limit != UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) *wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) max_wait = max(bps_wait, iops_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) *wait = max_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (time_before(tg->slice_end[rw], jiffies + max_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) throtl_extend_slice(tg, rw, jiffies + max_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) unsigned int bio_size = throtl_bio_data_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) /* Charge the bio to the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) tg->bytes_disp[rw] += bio_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) tg->io_disp[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) tg->last_bytes_disp[rw] += bio_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) tg->last_io_disp[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) * BIO_THROTTLED is used to prevent the same bio to be throttled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) * more than once as a throttled bio will go through blk-throtl the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * second time when it eventually gets issued. Set it when a bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * is being charged to a tg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) if (!bio_flagged(bio, BIO_THROTTLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) bio_set_flag(bio, BIO_THROTTLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) * throtl_add_bio_tg - add a bio to the specified throtl_grp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * @bio: bio to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * @qn: qnode to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * @tg: the target throtl_grp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * Add @bio to @tg's service_queue using @qn. If @qn is not specified,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * tg->qnode_on_self[] is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (!qn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) qn = &tg->qnode_on_self[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) * If @tg doesn't currently have any bios queued in the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) * direction, queueing @bio can change when @tg should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) * dispatched. Mark that @tg was empty. This is automatically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) * cleared on the next tg_update_disptime().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (!sq->nr_queued[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) tg->flags |= THROTL_TG_WAS_EMPTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) sq->nr_queued[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) throtl_enqueue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) static void tg_update_disptime(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) bio = throtl_peek_queued(&sq->queued[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) if (bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) tg_may_dispatch(tg, bio, &read_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) bio = throtl_peek_queued(&sq->queued[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) if (bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) tg_may_dispatch(tg, bio, &write_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) min_wait = min(read_wait, write_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) disptime = jiffies + min_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) /* Update dispatch time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) throtl_dequeue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) tg->disptime = disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) throtl_enqueue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) /* see throtl_add_bio_tg() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) tg->flags &= ~THROTL_TG_WAS_EMPTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) struct throtl_grp *parent_tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) if (throtl_slice_used(parent_tg, rw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) throtl_start_new_slice_with_credit(parent_tg, rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) child_tg->slice_start[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) struct throtl_service_queue *parent_sq = sq->parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) struct throtl_grp *parent_tg = sq_to_tg(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) struct throtl_grp *tg_to_put = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * @bio is being transferred from @tg to @parent_sq. Popping a bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) * from @tg may put its reference and @parent_sq might end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * getting released prematurely. Remember the tg to put and put it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) * after @bio is transferred to @parent_sq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) sq->nr_queued[rw]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) throtl_charge_bio(tg, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) * If our parent is another tg, we just need to transfer @bio to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) * the parent using throtl_add_bio_tg(). If our parent is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) * @td->service_queue, @bio is ready to be issued. Put it on its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * bio_lists[] and decrease total number queued. The caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) * responsible for issuing these bios.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) if (parent_tg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) start_parent_slice_with_credit(tg, parent_tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) &parent_sq->queued[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) BUG_ON(tg->td->nr_queued[rw] <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) tg->td->nr_queued[rw]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) throtl_trim_slice(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) if (tg_to_put)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) blkg_put(tg_to_blkg(tg_to_put));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static int throtl_dispatch_tg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) unsigned int nr_reads = 0, nr_writes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) /* Try to dispatch 75% READS and 25% WRITES */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) tg_may_dispatch(tg, bio, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) tg_dispatch_one_bio(tg, bio_data_dir(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) nr_reads++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) if (nr_reads >= max_nr_reads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) tg_may_dispatch(tg, bio, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) tg_dispatch_one_bio(tg, bio_data_dir(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) nr_writes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) if (nr_writes >= max_nr_writes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) return nr_reads + nr_writes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) unsigned int nr_disp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) struct throtl_service_queue *sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) if (!parent_sq->nr_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) tg = throtl_rb_first(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) if (time_before(jiffies, tg->disptime))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) throtl_dequeue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) nr_disp += throtl_dispatch_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) if (sq->nr_queued[0] || sq->nr_queued[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) if (nr_disp >= THROTL_QUANTUM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) return nr_disp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static bool throtl_can_upgrade(struct throtl_data *td,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) struct throtl_grp *this_tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) * throtl_pending_timer_fn - timer function for service_queue->pending_timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) * @t: the pending_timer member of the throtl_service_queue being serviced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) * This timer is armed when a child throtl_grp with active bio's become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) * pending and queued on the service_queue's pending_tree and expires when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) * the first child throtl_grp should be dispatched. This function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * dispatches bio's from the children throtl_grps to the parent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * service_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) * If the parent's parent is another throtl_grp, dispatching is propagated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) * by either arming its pending_timer or repeating dispatch directly. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) * the top-level service_tree is reached, throtl_data->dispatch_work is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) * kicked so that the ready bio's are issued.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) static void throtl_pending_timer_fn(struct timer_list *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) struct throtl_service_queue *sq = from_timer(sq, t, pending_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) struct throtl_grp *tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) struct throtl_data *td = sq_to_td(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) struct request_queue *q = td->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) struct throtl_service_queue *parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) bool dispatched;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (throtl_can_upgrade(td, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) throtl_upgrade_state(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) parent_sq = sq->parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) dispatched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) sq->nr_queued[READ] + sq->nr_queued[WRITE],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) sq->nr_queued[READ], sq->nr_queued[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) ret = throtl_select_dispatch(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) throtl_log(sq, "bios disp=%u", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) dispatched = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) if (throtl_schedule_next_dispatch(sq, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) /* this dispatch windows is still open, relax and repeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) if (!dispatched)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) if (parent_sq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) /* @parent_sq is another throl_grp, propagate dispatch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (tg->flags & THROTL_TG_WAS_EMPTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) if (!throtl_schedule_next_dispatch(parent_sq, false)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /* window is already open, repeat dispatching */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) sq = parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) /* reached the top-level, queue issuing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) queue_work(kthrotld_workqueue, &td->dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) * @work: work item being executed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * This function is queued for execution when bios reach the bio_lists[]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) * of throtl_data->service_queue. Those bios are ready and issued by this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) * function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) static void blk_throtl_dispatch_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) struct throtl_data *td = container_of(work, struct throtl_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) struct throtl_service_queue *td_sq = &td->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) struct request_queue *q = td->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) struct bio_list bio_list_on_stack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) bio_list_init(&bio_list_on_stack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) for (rw = READ; rw <= WRITE; rw++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) bio_list_add(&bio_list_on_stack, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if (!bio_list_empty(&bio_list_on_stack)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) while ((bio = bio_list_pop(&bio_list_on_stack)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) submit_bio_noacct(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) u64 v = *(u64 *)((void *)tg + off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) if (v == U64_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) return __blkg_prfill_u64(sf, pd, v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) unsigned int v = *(unsigned int *)((void *)tg + off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) if (v == UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) return __blkg_prfill_u64(sf, pd, v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) static int tg_print_conf_u64(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) &blkcg_policy_throtl, seq_cft(sf)->private, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) static int tg_print_conf_uint(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) &blkcg_policy_throtl, seq_cft(sf)->private, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) static void tg_conf_updated(struct throtl_grp *tg, bool global)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) * Update has_rules[] flags for the updated tg's subtree. A tg is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) * considered to have rules if either the tg itself or any of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) * ancestors has rules. This identifies groups without any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) * restrictions in the whole hierarchy and allows them to bypass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) * blk-throttle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) blkg_for_each_descendant_pre(blkg, pos_css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) struct throtl_grp *this_tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) struct throtl_grp *parent_tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) tg_update_has_rules(this_tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) /* ignore root/second level */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) !blkg->parent->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) parent_tg = blkg_to_tg(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) * make sure all children has lower idle time threshold and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) * higher latency target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) this_tg->idletime_threshold = min(this_tg->idletime_threshold,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) parent_tg->idletime_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) this_tg->latency_target = max(this_tg->latency_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) parent_tg->latency_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) * We're already holding queue_lock and know @tg is valid. Let's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) * apply the new config directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) * Restart the slices for both READ and WRITES. It might happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) * that a group's limit are dropped suddenly and we don't want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) * account recently dispatched IO with new low rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) throtl_start_new_slice(tg, READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) throtl_start_new_slice(tg, WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) if (tg->flags & THROTL_TG_PENDING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) throtl_schedule_next_dispatch(sq->parent_sq, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) static ssize_t tg_set_conf(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) char *buf, size_t nbytes, loff_t off, bool is_u64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) struct blkcg *blkcg = css_to_blkcg(of_css(of));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) struct blkg_conf_ctx ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) u64 v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) if (sscanf(ctx.body, "%llu", &v) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) if (!v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) v = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) tg = blkg_to_tg(ctx.blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) if (is_u64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) *(u64 *)((void *)tg + of_cft(of)->private) = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) *(unsigned int *)((void *)tg + of_cft(of)->private) = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) tg_conf_updated(tg, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) blkg_conf_finish(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) return tg_set_conf(of, buf, nbytes, off, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) return tg_set_conf(of, buf, nbytes, off, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) static int tg_print_rwstat(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) blkg_prfill_rwstat, &blkcg_policy_throtl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) seq_cft(sf)->private, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) static u64 tg_prfill_rwstat_recursive(struct seq_file *sf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) struct blkg_policy_data *pd, int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) struct blkg_rwstat_sample sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) &sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) return __blkg_prfill_rwstat(sf, pd, &sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) tg_prfill_rwstat_recursive, &blkcg_policy_throtl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) seq_cft(sf)->private, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) static struct cftype throtl_legacy_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) .name = "throttle.read_bps_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) .private = offsetof(struct throtl_grp, bps[READ][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) .seq_show = tg_print_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) .write = tg_set_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) .name = "throttle.write_bps_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) .private = offsetof(struct throtl_grp, bps[WRITE][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) .seq_show = tg_print_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) .write = tg_set_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) .name = "throttle.read_iops_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) .private = offsetof(struct throtl_grp, iops[READ][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) .seq_show = tg_print_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) .write = tg_set_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) .name = "throttle.write_iops_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) .private = offsetof(struct throtl_grp, iops[WRITE][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) .seq_show = tg_print_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) .write = tg_set_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) .name = "throttle.io_service_bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) .private = offsetof(struct throtl_grp, stat_bytes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) .seq_show = tg_print_rwstat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) .name = "throttle.io_service_bytes_recursive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) .private = offsetof(struct throtl_grp, stat_bytes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) .seq_show = tg_print_rwstat_recursive,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) .name = "throttle.io_serviced",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) .private = offsetof(struct throtl_grp, stat_ios),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) .seq_show = tg_print_rwstat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) .name = "throttle.io_serviced_recursive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) .private = offsetof(struct throtl_grp, stat_ios),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) .seq_show = tg_print_rwstat_recursive,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) { } /* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) const char *dname = blkg_dev_name(pd->blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) char bufs[4][21] = { "max", "max", "max", "max" };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) u64 bps_dft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) unsigned int iops_dft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) char idle_time[26] = "";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) char latency_time[26] = "";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) if (!dname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) if (off == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) bps_dft = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) iops_dft = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) bps_dft = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) iops_dft = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) if (tg->bps_conf[READ][off] == bps_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) tg->bps_conf[WRITE][off] == bps_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) tg->iops_conf[READ][off] == iops_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) tg->iops_conf[WRITE][off] == iops_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) (off != LIMIT_LOW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) tg->latency_target_conf == DFL_LATENCY_TARGET)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) if (tg->bps_conf[READ][off] != U64_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) snprintf(bufs[0], sizeof(bufs[0]), "%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) tg->bps_conf[READ][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) if (tg->bps_conf[WRITE][off] != U64_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) snprintf(bufs[1], sizeof(bufs[1]), "%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) tg->bps_conf[WRITE][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) if (tg->iops_conf[READ][off] != UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) snprintf(bufs[2], sizeof(bufs[2]), "%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) tg->iops_conf[READ][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) if (tg->iops_conf[WRITE][off] != UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) snprintf(bufs[3], sizeof(bufs[3]), "%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) tg->iops_conf[WRITE][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) if (off == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) if (tg->idletime_threshold_conf == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) strcpy(idle_time, " idle=max");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) snprintf(idle_time, sizeof(idle_time), " idle=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) tg->idletime_threshold_conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) if (tg->latency_target_conf == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) strcpy(latency_time, " latency=max");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) snprintf(latency_time, sizeof(latency_time),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) " latency=%lu", tg->latency_target_conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) latency_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) static int tg_print_limit(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) &blkcg_policy_throtl, seq_cft(sf)->private, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) static ssize_t tg_set_limit(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) struct blkcg *blkcg = css_to_blkcg(of_css(of));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) struct blkg_conf_ctx ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) u64 v[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) unsigned long idle_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) unsigned long latency_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) int index = of_cft(of)->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) tg = blkg_to_tg(ctx.blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) v[0] = tg->bps_conf[READ][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) v[1] = tg->bps_conf[WRITE][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) v[2] = tg->iops_conf[READ][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) v[3] = tg->iops_conf[WRITE][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) idle_time = tg->idletime_threshold_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) latency_time = tg->latency_target_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) char tok[27]; /* wiops=18446744073709551616 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) char *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) u64 val = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) if (tok[0] == '\0')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) ctx.body += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) p = tok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) strsep(&p, "=");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) ret = -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) if (!val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) if (!strcmp(tok, "rbps") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) v[0] = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) else if (!strcmp(tok, "wbps") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) v[1] = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) else if (!strcmp(tok, "riops") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) v[2] = min_t(u64, val, UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) else if (!strcmp(tok, "wiops") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) v[3] = min_t(u64, val, UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) idle_time = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) latency_time = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) tg->bps_conf[READ][index] = v[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) tg->bps_conf[WRITE][index] = v[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) tg->iops_conf[READ][index] = v[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) tg->iops_conf[WRITE][index] = v[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) if (index == LIMIT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) tg->bps[READ][index] = v[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) tg->bps[WRITE][index] = v[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) tg->iops[READ][index] = v[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) tg->iops[WRITE][index] = v[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) tg->bps_conf[READ][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) tg->bps_conf[WRITE][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) tg->iops_conf[READ][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) tg->iops_conf[WRITE][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) tg->idletime_threshold_conf = idle_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) tg->latency_target_conf = latency_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) /* force user to configure all settings for low limit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) tg->latency_target_conf == DFL_LATENCY_TARGET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) tg->bps[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) tg->bps[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) tg->iops[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) tg->iops[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) tg->idletime_threshold = DFL_IDLE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) tg->latency_target = DFL_LATENCY_TARGET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) } else if (index == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) tg->idletime_threshold = tg->idletime_threshold_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) tg->latency_target = tg->latency_target_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) blk_throtl_update_limit_valid(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) if (tg->td->limit_valid[LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) if (index == LIMIT_LOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) tg->td->limit_index = LIMIT_LOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) tg->td->limit_index = LIMIT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) tg_conf_updated(tg, index == LIMIT_LOW &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) tg->td->limit_valid[LIMIT_LOW]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) blkg_conf_finish(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) static struct cftype throtl_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) .name = "low",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) .flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) .seq_show = tg_print_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) .write = tg_set_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) .private = LIMIT_LOW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) .name = "max",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) .flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) .seq_show = tg_print_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) .write = tg_set_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) .private = LIMIT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) { } /* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) static void throtl_shutdown_wq(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) struct throtl_data *td = q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) cancel_work_sync(&td->dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) static struct blkcg_policy blkcg_policy_throtl = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) .dfl_cftypes = throtl_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) .legacy_cftypes = throtl_legacy_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) .pd_alloc_fn = throtl_pd_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) .pd_init_fn = throtl_pd_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) .pd_online_fn = throtl_pd_online,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) .pd_offline_fn = throtl_pd_offline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) .pd_free_fn = throtl_pd_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) unsigned long rtime = jiffies, wtime = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) if (tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) rtime = tg->last_low_overflow_time[READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) if (tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) wtime = tg->last_low_overflow_time[WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) return min(rtime, wtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) /* tg should not be an intermediate node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) static unsigned long tg_last_low_overflow_time(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) struct throtl_service_queue *parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) struct throtl_grp *parent = tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) unsigned long ret = __tg_last_low_overflow_time(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) parent_sq = parent->service_queue.parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) parent = sq_to_tg(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) if (!parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) * The parent doesn't have low limit, it always reaches low
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) * limit. Its overflow time is useless for children
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) if (!parent->bps[READ][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) !parent->iops[READ][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) !parent->bps[WRITE][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) !parent->iops[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) if (time_after(__tg_last_low_overflow_time(parent), ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) ret = __tg_last_low_overflow_time(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static bool throtl_tg_is_idle(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * cgroup is idle if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * - single idle is too long, longer than a fixed value (in case user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) * configure a too big threshold) or 4 times of idletime threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) * - average think time is more than threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) * - IO latency is largely below threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) unsigned long time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) ret = tg->latency_target == DFL_LATENCY_TARGET ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) tg->avg_idletime > tg->idletime_threshold ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) (tg->latency_target && tg->bio_cnt &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) tg->bad_bio_cnt * 5 < tg->bio_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) "avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) tg->bio_cnt, ret, tg->td->scale);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) bool read_limit, write_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) * if cgroup reaches low limit (if low limit is 0, the cgroup always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) * reaches), it's ok to upgrade to next limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) if (!read_limit && !write_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) if (read_limit && sq->nr_queued[READ] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) (!write_limit || sq->nr_queued[WRITE]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) if (write_limit && sq->nr_queued[WRITE] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) (!read_limit || sq->nr_queued[READ]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) if (time_after_eq(jiffies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) tg_last_low_overflow_time(tg) + tg->td->throtl_slice) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) throtl_tg_is_idle(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) if (throtl_tg_can_upgrade(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) tg = sq_to_tg(tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) if (!tg || !tg_to_blkg(tg)->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) static bool throtl_can_upgrade(struct throtl_data *td,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) struct throtl_grp *this_tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) if (td->limit_index != LIMIT_LOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) if (time_before(jiffies, td->low_downgrade_time + td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) if (tg == this_tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) if (!throtl_hierarchy_can_upgrade(tg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) static void throtl_upgrade_check(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) if (tg->td->limit_index != LIMIT_LOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) tg->last_check_time = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (!time_after_eq(now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) __tg_last_low_overflow_time(tg) + tg->td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) if (throtl_can_upgrade(tg->td, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) throtl_upgrade_state(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) static void throtl_upgrade_state(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) throtl_log(&td->service_queue, "upgrade to max");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) td->limit_index = LIMIT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) td->low_upgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) td->scale = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) tg->disptime = jiffies - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) throtl_select_dispatch(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) throtl_schedule_next_dispatch(sq, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) throtl_select_dispatch(&td->service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) throtl_schedule_next_dispatch(&td->service_queue, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) queue_work(kthrotld_workqueue, &td->dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) static void throtl_downgrade_state(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) td->scale /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) if (td->scale) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) td->limit_index = LIMIT_LOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) td->low_downgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) static bool throtl_tg_can_downgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) struct throtl_data *td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) * If cgroup is below low limit, consider downgrade and throttle other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) * cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) if (time_after_eq(now, td->low_upgrade_time + td->throtl_slice) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) time_after_eq(now, tg_last_low_overflow_time(tg) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) td->throtl_slice) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) (!throtl_tg_is_idle(tg) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) !list_empty(&tg_to_blkg(tg)->blkcg->css.children)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) static bool throtl_hierarchy_can_downgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) if (!throtl_tg_can_downgrade(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) tg = sq_to_tg(tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) if (!tg || !tg_to_blkg(tg)->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) static void throtl_downgrade_check(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) uint64_t bps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) unsigned int iops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) unsigned long elapsed_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) if (tg->td->limit_index != LIMIT_MAX ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) !tg->td->limit_valid[LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) elapsed_time = now - tg->last_check_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) tg->last_check_time = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) if (time_before(now, tg_last_low_overflow_time(tg) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) tg->td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) if (tg->bps[READ][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) bps = tg->last_bytes_disp[READ] * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) do_div(bps, elapsed_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) if (bps >= tg->bps[READ][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) tg->last_low_overflow_time[READ] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) if (tg->bps[WRITE][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) bps = tg->last_bytes_disp[WRITE] * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) do_div(bps, elapsed_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) if (bps >= tg->bps[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) tg->last_low_overflow_time[WRITE] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) if (tg->iops[READ][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) iops = tg->last_io_disp[READ] * HZ / elapsed_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) if (iops >= tg->iops[READ][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) tg->last_low_overflow_time[READ] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) if (tg->iops[WRITE][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) if (iops >= tg->iops[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) tg->last_low_overflow_time[WRITE] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) * If cgroup is below low limit, consider downgrade and throttle other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) * cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) if (throtl_hierarchy_can_downgrade(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) throtl_downgrade_state(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) tg->last_bytes_disp[READ] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) tg->last_bytes_disp[WRITE] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) tg->last_io_disp[READ] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) tg->last_io_disp[WRITE] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) static void blk_throtl_update_idletime(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) unsigned long now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) unsigned long last_finish_time = tg->last_finish_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) if (last_finish_time == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) now = ktime_get_ns() >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) if (now <= last_finish_time ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) last_finish_time == tg->checked_last_finish_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) tg->avg_idletime = (tg->avg_idletime * 7 + now - last_finish_time) >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) tg->checked_last_finish_time = last_finish_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) static void throtl_update_latency_buckets(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) int i, cpu, rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) unsigned long last_latency[2] = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) unsigned long latency[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) if (time_before(jiffies, td->last_calculate_time + HZ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) td->last_calculate_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) memset(avg_latency, 0, sizeof(avg_latency));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) for (rw = READ; rw <= WRITE; rw++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) struct latency_bucket *tmp = &td->tmp_buckets[rw][i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) struct latency_bucket *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) /* this isn't race free, but ok in practice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) bucket = per_cpu_ptr(td->latency_buckets[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) tmp->total_latency += bucket[i].total_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) tmp->samples += bucket[i].samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) bucket[i].total_latency = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) bucket[i].samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) if (tmp->samples >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) int samples = tmp->samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) latency[rw] = tmp->total_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) tmp->total_latency = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) tmp->samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) latency[rw] /= samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) if (latency[rw] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) avg_latency[rw][i].latency = latency[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) for (rw = READ; rw <= WRITE; rw++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) if (!avg_latency[rw][i].latency) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) if (td->avg_buckets[rw][i].latency < last_latency[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) td->avg_buckets[rw][i].latency =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) last_latency[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) if (!td->avg_buckets[rw][i].valid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) latency[rw] = avg_latency[rw][i].latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) latency[rw] = (td->avg_buckets[rw][i].latency * 7 +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) avg_latency[rw][i].latency) >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) td->avg_buckets[rw][i].latency = max(latency[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) last_latency[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) td->avg_buckets[rw][i].valid = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) last_latency[rw] = td->avg_buckets[rw][i].latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) throtl_log(&td->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) "Latency bucket %d: read latency=%ld, read valid=%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) "write latency=%ld, write valid=%d", i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) td->avg_buckets[READ][i].latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) td->avg_buckets[READ][i].valid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) td->avg_buckets[WRITE][i].latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) td->avg_buckets[WRITE][i].valid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) static inline void throtl_update_latency_buckets(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) void blk_throtl_charge_bio_split(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) struct blkcg_gq *blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) struct throtl_grp *parent = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) struct throtl_service_queue *parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) if (!parent->has_rules[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) atomic_inc(&parent->io_split_cnt[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) atomic_inc(&parent->last_io_split_cnt[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) parent_sq = parent->service_queue.parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) parent = sq_to_tg(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) } while (parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) bool blk_throtl_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) struct request_queue *q = bio->bi_disk->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) struct blkcg_gq *blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) struct throtl_qnode *qn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) struct throtl_service_queue *sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) bool throttled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) struct throtl_data *td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) /* see throtl_charge_bio() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) if (bio_flagged(bio, BIO_THROTTLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) bio->bi_iter.bi_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) if (!tg->has_rules[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) throtl_update_latency_buckets(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) blk_throtl_update_idletime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) if (tg->last_low_overflow_time[rw] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) tg->last_low_overflow_time[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) throtl_downgrade_check(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) throtl_upgrade_check(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) /* throtl is FIFO - if bios are already queued, should queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) if (sq->nr_queued[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) /* if above limits, break to queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) if (!tg_may_dispatch(tg, bio, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) tg->last_low_overflow_time[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) if (throtl_can_upgrade(td, tg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) throtl_upgrade_state(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) /* within limits, let's charge and dispatch directly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) throtl_charge_bio(tg, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) * We need to trim slice even when bios are not being queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) * otherwise it might happen that a bio is not queued for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * a long time and slice keeps on extending and trim is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * called for a long time. Now if limits are reduced suddenly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * we take into account all the IO dispatched so far at new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * low rate and * newly queued IO gets a really long dispatch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) * So keep on trimming slice even if bio is not queued.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) throtl_trim_slice(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) * @bio passed through this layer without being throttled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) * Climb up the ladder. If we're already at the top, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) * can be executed directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) qn = &tg->qnode_on_parent[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) sq = sq->parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) /* out-of-limit, queue to @tg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) rw == READ ? 'R' : 'W',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) tg->bytes_disp[rw], bio->bi_iter.bi_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) tg_bps_limit(tg, rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) tg->io_disp[rw], tg_iops_limit(tg, rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) sq->nr_queued[READ], sq->nr_queued[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) tg->last_low_overflow_time[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) td->nr_queued[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) throtl_add_bio_tg(bio, qn, tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) throttled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) * Update @tg's dispatch time and force schedule dispatch if @tg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) * was empty before @bio. The forced scheduling isn't likely to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) * cause undue delay as @bio is likely to be dispatched directly if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) * its @tg's disptime is not in the future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) if (tg->flags & THROTL_TG_WAS_EMPTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) bio_set_flag(bio, BIO_THROTTLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) if (throttled || !td->track_bio_latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) return throttled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) static void throtl_track_latency(struct throtl_data *td, sector_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) int op, unsigned long time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) struct latency_bucket *latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) if (!td || td->limit_index != LIMIT_LOW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) !(op == REQ_OP_READ || op == REQ_OP_WRITE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) !blk_queue_nonrot(td->queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) index = request_bucket_index(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) latency = get_cpu_ptr(td->latency_buckets[op]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) latency[index].total_latency += time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) latency[index].samples++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) put_cpu_ptr(td->latency_buckets[op]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) void blk_throtl_stat_add(struct request *rq, u64 time_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) struct throtl_data *td = q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) time_ns >> 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) void blk_throtl_bio_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) u64 finish_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) unsigned long finish_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) unsigned long start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) unsigned long lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) int rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) if (!tg->td->limit_valid[LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) finish_time_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) tg->last_finish_time = finish_time_ns >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) start_time = bio_issue_time(&bio->bi_issue) >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) finish_time = __bio_issue_time(finish_time_ns) >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) if (!start_time || finish_time <= start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) lat = finish_time - start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) /* this is only for bio based driver */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) bio_op(bio), lat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) if (tg->latency_target && lat >= tg->td->filtered_latency) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) int bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) unsigned int threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) threshold = tg->td->avg_buckets[rw][bucket].latency +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) tg->latency_target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) if (lat > threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) tg->bad_bio_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) * Not race free, could get wrong count, which means cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) * will be throttled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) tg->bio_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) if (time_after(jiffies, tg->bio_cnt_reset_time) || tg->bio_cnt > 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) tg->bio_cnt_reset_time = tg->td->throtl_slice + jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) tg->bio_cnt /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) tg->bad_bio_cnt /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) int blk_throtl_init(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) if (!td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) LATENCY_BUCKET_SIZE, __alignof__(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) if (!td->latency_buckets[READ]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) kfree(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) LATENCY_BUCKET_SIZE, __alignof__(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) if (!td->latency_buckets[WRITE]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) free_percpu(td->latency_buckets[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) kfree(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) throtl_service_queue_init(&td->service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) q->td = td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) td->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) td->limit_valid[LIMIT_MAX] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) td->limit_index = LIMIT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) td->low_upgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) td->low_downgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) /* activate policy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) free_percpu(td->latency_buckets[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) free_percpu(td->latency_buckets[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) kfree(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) void blk_throtl_exit(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) BUG_ON(!q->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) del_timer_sync(&q->td->service_queue.pending_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) throtl_shutdown_wq(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) blkcg_deactivate_policy(q, &blkcg_policy_throtl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) free_percpu(q->td->latency_buckets[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) free_percpu(q->td->latency_buckets[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) kfree(q->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) void blk_throtl_register_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) td = q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) BUG_ON(!td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) if (blk_queue_nonrot(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) td->throtl_slice = DFL_THROTL_SLICE_SSD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) td->filtered_latency = LATENCY_FILTERED_SSD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) td->throtl_slice = DFL_THROTL_SLICE_HD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) td->filtered_latency = LATENCY_FILTERED_HD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) /* if no low limit, use previous default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) td->throtl_slice = DFL_THROTL_SLICE_HD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) td->track_bio_latency = !queue_is_mq(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) if (!td->track_bio_latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) blk_stat_enable_accounting(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) if (!q->td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) return sprintf(page, "%u\n", jiffies_to_msecs(q->td->throtl_slice));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) ssize_t blk_throtl_sample_time_store(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) const char *page, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) unsigned long v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) unsigned long t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) if (!q->td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) if (kstrtoul(page, 10, &v))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) t = msecs_to_jiffies(v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) if (t == 0 || t > MAX_THROTL_SLICE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) q->td->throtl_slice = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) static int __init throtl_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) if (!kthrotld_workqueue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) panic("Failed to create kthrotld\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) return blkcg_policy_register(&blkcg_policy_throtl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) module_init(throtl_init);