Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Interface for controlling IO bandwidth on a request queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Copyright (C) 2010 Vivek Goyal <vgoyal@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/blktrace_api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include "blk-cgroup-rwstat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) /* Max dispatch from a group in 1 round */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #define THROTL_GRP_QUANTUM 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) /* Total max dispatch from all groups in one round */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #define THROTL_QUANTUM 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) /* Throttling is performed over a slice and after that slice is renewed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #define DFL_THROTL_SLICE_HD (HZ / 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #define DFL_THROTL_SLICE_SSD (HZ / 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #define MAX_THROTL_SLICE (HZ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #define MIN_THROTL_BPS (320 * 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #define MIN_THROTL_IOPS (10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #define DFL_LATENCY_TARGET (-1L)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #define DFL_IDLE_THRESHOLD (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #define DFL_HD_BASELINE_LATENCY (4000L) /* 4ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #define LATENCY_FILTERED_SSD (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * For HD, very small latency comes from sequential IO. Such IO is helpless to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  * help determine if its IO is impacted by others, hence we ignore the IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #define LATENCY_FILTERED_HD (1000L) /* 1ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) static struct blkcg_policy blkcg_policy_throtl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) /* A workqueue to queue throttle related work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) static struct workqueue_struct *kthrotld_workqueue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  * To implement hierarchical throttling, throtl_grps form a tree and bios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  * are dispatched upwards level by level until they reach the top and get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  * issued.  When dispatching bios from the children and local group at each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  * level, if the bios are dispatched into a single bio_list, there's a risk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  * of a local or child group which can queue many bios at once filling up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  * the list starving others.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  * To avoid such starvation, dispatched bios are queued separately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  * according to where they came from.  When they are again dispatched to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  * the parent, they're popped in round-robin order so that no single source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  * hogs the dispatch window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  * throtl_qnode is used to keep the queued bios separated by their sources.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  * Bios are queued to throtl_qnode which in turn is queued to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  * throtl_service_queue and then dispatched in round-robin order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  * It's also used to track the reference counts on blkg's.  A qnode always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  * belongs to a throtl_grp and gets queued on itself or the parent, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  * incrementing the reference of the associated throtl_grp when a qnode is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  * queued and decrementing when dequeued is enough to keep the whole blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)  * tree pinned while bios are in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) struct throtl_qnode {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	struct list_head	node;		/* service_queue->queued[] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	struct bio_list		bios;		/* queued bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	struct throtl_grp	*tg;		/* tg this qnode belongs to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) struct throtl_service_queue {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	struct throtl_service_queue *parent_sq;	/* the parent service_queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	 * Bios queued directly to this service_queue or dispatched from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	 * children throtl_grp's.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	unsigned int		nr_queued[2];	/* number of queued bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	 * RB tree of active children throtl_grp's, which are sorted by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	 * their ->disptime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	struct rb_root_cached	pending_tree;	/* RB tree of active tgs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	unsigned int		nr_pending;	/* # queued in the tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	unsigned long		first_pending_disptime;	/* disptime of the first tg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) enum tg_state_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) #define rb_entry_tg(node)	rb_entry((node), struct throtl_grp, rb_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	LIMIT_LOW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	LIMIT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	LIMIT_CNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) struct throtl_grp {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	/* must be the first member */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	struct blkg_policy_data pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	/* active throtl group service_queue member */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	struct rb_node rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	/* throtl_data this group belongs to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	/* this group's service queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	struct throtl_service_queue service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	 * qnode_on_self is used when bios are directly queued to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	 * throtl_grp so that local bios compete fairly with bios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	 * dispatched from children.  qnode_on_parent is used when bios are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	 * dispatched from this throtl_grp into its parent and will compete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	 * with the sibling qnode_on_parents and the parent's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	 * qnode_on_self.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	struct throtl_qnode qnode_on_self[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	struct throtl_qnode qnode_on_parent[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	 * Dispatch time in jiffies. This is the estimated time when group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	 * will unthrottle and is ready to dispatch more bio. It is used as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	 * key to sort active groups in service tree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	unsigned long disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	/* are there any throtl rules between this group and td? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	bool has_rules[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	/* internally used bytes per second rate limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	uint64_t bps[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	/* user configured bps limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	uint64_t bps_conf[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	/* internally used IOPS limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	unsigned int iops[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	/* user configured IOPS limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	unsigned int iops_conf[2][LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	/* Number of bytes dispatched in current slice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	uint64_t bytes_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	/* Number of bio's dispatched in current slice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	unsigned int io_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	unsigned long last_low_overflow_time[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	uint64_t last_bytes_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	unsigned int last_io_disp[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	unsigned long last_check_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	unsigned long latency_target; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	unsigned long latency_target_conf; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	/* When did we start a new slice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	unsigned long slice_start[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	unsigned long slice_end[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	unsigned long last_finish_time; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	unsigned long checked_last_finish_time; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	unsigned long avg_idletime; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	unsigned long idletime_threshold; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	unsigned long idletime_threshold_conf; /* us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	unsigned int bio_cnt; /* total bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	unsigned long bio_cnt_reset_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	atomic_t io_split_cnt[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	atomic_t last_io_split_cnt[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	struct blkg_rwstat stat_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	struct blkg_rwstat stat_ios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) /* We measure latency for request size from <= 4k to >= 1M */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) #define LATENCY_BUCKET_SIZE 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) struct latency_bucket {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	unsigned long total_latency; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	int samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) struct avg_latency_bucket {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	unsigned long latency; /* ns / 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	bool valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) struct throtl_data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	/* service tree for active throtl groups */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	struct throtl_service_queue service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	struct request_queue *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	/* Total Number of queued bios on READ and WRITE lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	unsigned int nr_queued[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	unsigned int throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	/* Work for dispatching throttled bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	struct work_struct dispatch_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	unsigned int limit_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	bool limit_valid[LIMIT_CNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	unsigned long low_upgrade_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	unsigned long low_downgrade_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	unsigned int scale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	struct latency_bucket __percpu *latency_buckets[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	unsigned long last_calculate_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	unsigned long filtered_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	bool track_bio_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) static void throtl_pending_timer_fn(struct timer_list *t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	return pd_to_blkg(&tg->pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250)  * sq_to_tg - return the throl_grp the specified service queue belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251)  * @sq: the throtl_service_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253)  * Return the throtl_grp @sq belongs to.  If @sq is the top-level one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254)  * embedded in throtl_data, %NULL is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	if (sq && sq->parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 		return container_of(sq, struct throtl_grp, service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265)  * sq_to_td - return throtl_data the specified service queue belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266)  * @sq: the throtl_service_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268)  * A service_queue can be embedded in either a throtl_grp or throtl_data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269)  * Determine the associated throtl_data accordingly and return it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) static struct throtl_data *sq_to_td(struct throtl_service_queue *sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	struct throtl_grp *tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	if (tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 		return tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		return container_of(sq, struct throtl_data, service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282)  * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283)  * make the IO dispatch more smooth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284)  * Scale up: linearly scale up according to lapsed time since upgrade. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285)  *           every throtl_slice, the limit scales up 1/2 .low limit till the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286)  *           limit hits .max limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287)  * Scale down: exponentially scale down if a cgroup doesn't hit its .low limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) static uint64_t throtl_adjusted_limit(uint64_t low, struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	/* arbitrary value to avoid too big scale */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	if (td->scale < 4096 && time_after_eq(jiffies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	    td->low_upgrade_time + td->scale * td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		td->scale = (jiffies - td->low_upgrade_time) / td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	return low + (low >> 1) * td->scale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	struct blkcg_gq *blkg = tg_to_blkg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	uint64_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		return U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	ret = tg->bps[rw][td->limit_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	if (ret == 0 && td->limit_index == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 		/* intermediate node or iops isn't 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 		if (!list_empty(&blkg->blkcg->css.children) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 		    tg->iops[rw][td->limit_index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 			return U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 			return MIN_THROTL_BPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 		uint64_t adjusted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		adjusted = throtl_adjusted_limit(tg->bps[rw][LIMIT_LOW], td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 		ret = min(tg->bps[rw][LIMIT_MAX], adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	struct blkcg_gq *blkg = tg_to_blkg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	unsigned int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 		return UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	ret = tg->iops[rw][td->limit_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		/* intermediate node or bps isn't 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		if (!list_empty(&blkg->blkcg->css.children) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 		    tg->bps[rw][td->limit_index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 			return UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 			return MIN_THROTL_IOPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 		uint64_t adjusted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 		adjusted = throtl_adjusted_limit(tg->iops[rw][LIMIT_LOW], td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 		if (adjusted > UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 			adjusted = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		ret = min_t(unsigned int, tg->iops[rw][LIMIT_MAX], adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) #define request_bucket_index(sectors) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365)  * throtl_log - log debug message via blktrace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366)  * @sq: the service_queue being reported
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367)  * @fmt: printf format string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368)  * @args: printf args
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370)  * The messages are prefixed with "throtl BLKG_NAME" if @sq belongs to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371)  * throtl_grp; otherwise, just "throtl".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) #define throtl_log(sq, fmt, args...)	do {				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 	struct throtl_grp *__tg = sq_to_tg((sq));			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	struct throtl_data *__td = sq_to_td((sq));			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 									\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	(void)__td;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	if (likely(!blk_trace_note_message_enabled(__td->queue)))	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 		break;							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	if ((__tg)) {							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 		blk_add_cgroup_trace_msg(__td->queue,			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 			tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	} else {							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		blk_add_trace_msg(__td->queue, "throtl " fmt, ##args);	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	}								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) static inline unsigned int throtl_bio_data_size(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	/* assume it's one sector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		return 512;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	return bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	INIT_LIST_HEAD(&qn->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	bio_list_init(&qn->bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	qn->tg = tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404)  * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405)  * @bio: bio being added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406)  * @qn: qnode to add bio to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407)  * @queued: the service_queue->queued[] list @qn belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409)  * Add @bio to @qn and put @qn on @queued if it's not already on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410)  * @qn->tg's reference count is bumped when @qn is activated.  See the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411)  * comment on top of throtl_qnode definition for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 				 struct list_head *queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	bio_list_add(&qn->bios, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	if (list_empty(&qn->node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 		list_add_tail(&qn->node, queued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		blkg_get(tg_to_blkg(qn->tg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424)  * throtl_peek_queued - peek the first bio on a qnode list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425)  * @queued: the qnode list to peek
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) static struct bio *throtl_peek_queued(struct list_head *queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	struct throtl_qnode *qn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	if (list_empty(queued))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	qn = list_first_entry(queued, struct throtl_qnode, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	bio = bio_list_peek(&qn->bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	WARN_ON_ONCE(!bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	return bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442)  * throtl_pop_queued - pop the first bio form a qnode list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443)  * @queued: the qnode list to pop a bio from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444)  * @tg_to_put: optional out argument for throtl_grp to put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446)  * Pop the first bio from the qnode list @queued.  After popping, the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447)  * qnode is removed from @queued if empty or moved to the end of @queued so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448)  * that the popping order is round-robin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450)  * When the first qnode is removed, its associated throtl_grp should be put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451)  * too.  If @tg_to_put is NULL, this function automatically puts it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452)  * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453)  * responsible for putting it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) static struct bio *throtl_pop_queued(struct list_head *queued,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 				     struct throtl_grp **tg_to_put)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	struct throtl_qnode *qn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	if (list_empty(queued))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	qn = list_first_entry(queued, struct throtl_qnode, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	bio = bio_list_pop(&qn->bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	WARN_ON_ONCE(!bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	if (bio_list_empty(&qn->bios)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		list_del_init(&qn->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		if (tg_to_put)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 			*tg_to_put = qn->tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 			blkg_put(tg_to_blkg(qn->tg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 		list_move_tail(&qn->node, queued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	return bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) /* init a service_queue, assumes the caller zeroed it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) static void throtl_service_queue_init(struct throtl_service_queue *sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	INIT_LIST_HEAD(&sq->queued[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	INIT_LIST_HEAD(&sq->queued[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	sq->pending_tree = RB_ROOT_CACHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 						struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 						struct blkcg *blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	tg = kzalloc_node(sizeof(*tg), gfp, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	if (blkg_rwstat_init(&tg->stat_bytes, gfp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		goto err_free_tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	if (blkg_rwstat_init(&tg->stat_ios, gfp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		goto err_exit_stat_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	throtl_service_queue_init(&tg->service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	for (rw = READ; rw <= WRITE; rw++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 		throtl_qnode_init(&tg->qnode_on_self[rw], tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		throtl_qnode_init(&tg->qnode_on_parent[rw], tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	RB_CLEAR_NODE(&tg->rb_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	tg->bps[READ][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	tg->bps[WRITE][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	tg->iops[READ][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	tg->iops[WRITE][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	tg->bps_conf[READ][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	/* LIMIT_LOW will have default value 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	tg->latency_target = DFL_LATENCY_TARGET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	tg->latency_target_conf = DFL_LATENCY_TARGET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	return &tg->pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) err_exit_stat_bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	blkg_rwstat_exit(&tg->stat_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) err_free_tg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	kfree(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) static void throtl_pd_init(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	struct blkcg_gq *blkg = tg_to_blkg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	struct throtl_data *td = blkg->q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	 * If on the default hierarchy, we switch to properly hierarchical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	 * behavior where limits on a given throtl_grp are applied to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	 * whole subtree rather than just the group itself.  e.g. If 16M
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 	 * read_bps limit is set on the root group, the whole system can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	 * exceed 16M for the device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	 * If not on the default hierarchy, the broken flat hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	 * behavior is retained where all throtl_grps are treated as if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	 * they're all separate root groups right below throtl_data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	 * Limits of a group don't interact with limits of other groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	 * regardless of the position of the group in the hierarchy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	sq->parent_sq = &td->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	tg->td = td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566)  * Set has_rules[] if @tg or any of its parents have limits configured.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567)  * This doesn't require walking up to the top of the hierarchy as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568)  * parent's has_rules[] is guaranteed to be correct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) static void tg_update_has_rules(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	struct throtl_data *td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	for (rw = READ; rw <= WRITE; rw++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 		tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 			(td->limit_valid[td->limit_index] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 			 (tg_bps_limit(tg, rw) != U64_MAX ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 			  tg_iops_limit(tg, rw) != UINT_MAX));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) static void throtl_pd_online(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	 * We don't want new groups to escape the limits of its ancestors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	 * Update has_rules[] after a new group is brought online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	tg_update_has_rules(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) static void blk_throtl_update_limit_valid(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 	bool low_valid = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 		struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 		if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		    tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 			low_valid = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	td->limit_valid[LIMIT_LOW] = low_valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) static void throtl_upgrade_state(struct throtl_data *td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) static void throtl_pd_offline(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	tg->bps[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	tg->bps[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	tg->iops[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	tg->iops[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	blk_throtl_update_limit_valid(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	if (!tg->td->limit_valid[tg->td->limit_index])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 		throtl_upgrade_state(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) static void throtl_pd_free(struct blkg_policy_data *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	del_timer_sync(&tg->service_queue.pending_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	blkg_rwstat_exit(&tg->stat_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	blkg_rwstat_exit(&tg->stat_ios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	kfree(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) static struct throtl_grp *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) throtl_rb_first(struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	n = rb_first_cached(&parent_sq->pending_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	WARN_ON_ONCE(!n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	if (!n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	return rb_entry_tg(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) static void throtl_rb_erase(struct rb_node *n,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 			    struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	rb_erase_cached(n, &parent_sq->pending_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	RB_CLEAR_NODE(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	--parent_sq->nr_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	tg = throtl_rb_first(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	parent_sq->first_pending_disptime = tg->disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) static void tg_service_queue_add(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	struct rb_node *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	struct throtl_grp *__tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	unsigned long key = tg->disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	bool leftmost = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	while (*node != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		parent = *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		__tg = rb_entry_tg(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		if (time_before(key, __tg->disptime))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 			node = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 			node = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 			leftmost = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	rb_link_node(&tg->rb_node, parent, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 			       leftmost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) static void throtl_enqueue_tg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	if (!(tg->flags & THROTL_TG_PENDING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 		tg_service_queue_add(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 		tg->flags |= THROTL_TG_PENDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 		tg->service_queue.parent_sq->nr_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) static void throtl_dequeue_tg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	if (tg->flags & THROTL_TG_PENDING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 		throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 		tg->flags &= ~THROTL_TG_PENDING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) /* Call with queue lock held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) static void throtl_schedule_pending_timer(struct throtl_service_queue *sq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 					  unsigned long expires)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	unsigned long max_expire = jiffies + 8 * sq_to_td(sq)->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	 * Since we are adjusting the throttle limit dynamically, the sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	 * time calculated according to previous limit might be invalid. It's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	 * possible the cgroup sleep time is very long and no other cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 	 * have IO running so notify the limit changes. Make sure the cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	 * doesn't sleep too long to avoid the missed notification.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	if (time_after(expires, max_expire))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 		expires = max_expire;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	mod_timer(&sq->pending_timer, expires);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	throtl_log(sq, "schedule timer. delay=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 		   expires - jiffies, jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735)  * throtl_schedule_next_dispatch - schedule the next dispatch cycle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736)  * @sq: the service_queue to schedule dispatch for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737)  * @force: force scheduling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739)  * Arm @sq->pending_timer so that the next dispatch cycle starts on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740)  * dispatch time of the first pending child.  Returns %true if either timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741)  * is armed or there's no pending child left.  %false if the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742)  * dispatch window is still open and the caller should continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743)  * dispatching.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745)  * If @force is %true, the dispatch timer is always scheduled and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746)  * function is guaranteed to return %true.  This is to be used when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747)  * caller can't dispatch itself and needs to invoke pending_timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748)  * unconditionally.  Note that forced scheduling is likely to induce short
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749)  * delay before dispatch starts even if @sq->first_pending_disptime is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750)  * in the future and thus shouldn't be used in hot paths.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) static bool throtl_schedule_next_dispatch(struct throtl_service_queue *sq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 					  bool force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	/* any pending children left? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	if (!sq->nr_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	update_min_dispatch_time(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	/* is the next dispatch time in the future? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	if (force || time_after(sq->first_pending_disptime, jiffies)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 		throtl_schedule_pending_timer(sq, sq->first_pending_disptime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	/* tell the caller to continue dispatching */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		bool rw, unsigned long start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	tg->bytes_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	tg->io_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	atomic_set(&tg->io_split_cnt[rw], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	 * Previous slice has expired. We must have trimmed it after last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	 * bio dispatch. That means since start of last slice, we never used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	 * that bandwidth. Do try to make use of that bandwidth while giving
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	 * credit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	if (time_after_eq(start, tg->slice_start[rw]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		tg->slice_start[rw] = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		   "[%c] new slice with credit start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		   tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	tg->bytes_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	tg->io_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	tg->slice_start[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 	atomic_set(&tg->io_split_cnt[rw], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		   "[%c] new slice start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		   tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 					unsigned long jiffy_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	tg->slice_end[rw] = roundup(jiffy_end, tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 				       unsigned long jiffy_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	throtl_set_slice_end(tg, rw, jiffy_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 		   "[%c] extend slice start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		   tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) /* Determine if previously allocated or extended slice is complete or not */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) /* Trim the used slices and adjust slice start accordingly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 	unsigned long nr_slices, time_elapsed, io_trim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	u64 bytes_trim, tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	 * If bps are unlimited (-1), then time slice don't get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	 * renewed. Don't try to trim the slice if slice is used. A new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 	 * slice will start when appropriate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	if (throtl_slice_used(tg, rw))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	 * A bio has been dispatched. Also adjust slice_end. It might happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	 * that initially cgroup limit was very low resulting in high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	 * slice_end, but later limit was bumped up and bio was dispatched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	 * sooner, then we need to reduce slice_end. A high bogus slice_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	 * is bad because it does not allow new slice to start.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	time_elapsed = jiffies - tg->slice_start[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	nr_slices = time_elapsed / tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	if (!nr_slices)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	do_div(tmp, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	bytes_trim = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	if (!bytes_trim && !io_trim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	if (tg->bytes_disp[rw] >= bytes_trim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 		tg->bytes_disp[rw] -= bytes_trim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 		tg->bytes_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	if (tg->io_disp[rw] >= io_trim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		tg->io_disp[rw] -= io_trim;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		tg->io_disp[rw] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		   "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		   rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		   tg->slice_start[rw], tg->slice_end[rw], jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 				  u32 iops_limit, unsigned long *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	unsigned int io_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	if (iops_limit == UINT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 			*wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	jiffy_elapsed = jiffies - tg->slice_start[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	/* Round up to the next throttle slice, wait time must be nonzero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	 * jiffy_elapsed_rnd should not be a big value as minimum iops can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	 * 1 then at max jiffy elapsed should be equivalent of 1 second as we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	 * will allow dispatch after 1 second and after that slice should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	 * have been trimmed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	tmp = (u64)iops_limit * jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	do_div(tmp, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	if (tmp > UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 		io_allowed = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 		io_allowed = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	if (tg->io_disp[rw] + 1 <= io_allowed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 		if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 			*wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	/* Calc approx time to dispatch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 		*wait = jiffy_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 				 u64 bps_limit, unsigned long *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	u64 bytes_allowed, extra_bytes, tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	unsigned int bio_size = throtl_bio_data_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	if (bps_limit == U64_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 			*wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	/* Slice has just started. Consider one slice interval */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	if (!jiffy_elapsed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		jiffy_elapsed_rnd = tg->td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	tmp = bps_limit * jiffy_elapsed_rnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	do_div(tmp, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 	bytes_allowed = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 		if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 			*wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	/* Calc approx time to dispatch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	jiffy_wait = div64_u64(extra_bytes * HZ, bps_limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	if (!jiffy_wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 		jiffy_wait = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	 * This wait time is without taking into consideration the rounding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	 * up we did. Add that time also.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 		*wait = jiffy_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993)  * Returns whether one can dispatch a bio or not. Also returns approx number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994)  * of jiffies to wait before this bio is with-in IO rate and can be dispatched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 			    unsigned long *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	u64 bps_limit = tg_bps_limit(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	u32 iops_limit = tg_iops_limit(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)  	 * Currently whole state machine of group depends on first bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	 * queued in the group bio list. So one should not be calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	 * this function with a different bio if there are other bios
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	 * queued.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	BUG_ON(tg->service_queue.nr_queued[rw] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	       bio != throtl_peek_queued(&tg->service_queue.queued[rw]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	/* If tg->bps = -1, then BW is unlimited */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	if (bps_limit == U64_MAX && iops_limit == UINT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 		if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 			*wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	 * If previous slice expired, start a new one otherwise renew/extend
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	 * existing slice to make sure it is at least throtl_slice interval
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	 * long since now. New slice is started only for empty throttle group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	 * If there is queued bio, that means there should be an active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	 * slice and it should be extended instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 		throtl_start_new_slice(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 		if (time_before(tg->slice_end[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 		    jiffies + tg->td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 			throtl_extend_slice(tg, rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 				jiffies + tg->td->throtl_slice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	if (iops_limit != UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	    tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 		if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 			*wait = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	max_wait = max(bps_wait, iops_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	if (wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		*wait = max_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	if (time_before(tg->slice_end[rw], jiffies + max_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		throtl_extend_slice(tg, rw, jiffies + max_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	unsigned int bio_size = throtl_bio_data_size(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	/* Charge the bio to the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	tg->bytes_disp[rw] += bio_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	tg->io_disp[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	tg->last_bytes_disp[rw] += bio_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	tg->last_io_disp[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	 * BIO_THROTTLED is used to prevent the same bio to be throttled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	 * more than once as a throttled bio will go through blk-throtl the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	 * second time when it eventually gets issued.  Set it when a bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	 * is being charged to a tg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	if (!bio_flagged(bio, BIO_THROTTLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 		bio_set_flag(bio, BIO_THROTTLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)  * throtl_add_bio_tg - add a bio to the specified throtl_grp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)  * @bio: bio to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)  * @qn: qnode to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)  * @tg: the target throtl_grp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)  * Add @bio to @tg's service_queue using @qn.  If @qn is not specified,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)  * tg->qnode_on_self[] is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 			      struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	if (!qn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		qn = &tg->qnode_on_self[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	 * If @tg doesn't currently have any bios queued in the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	 * direction, queueing @bio can change when @tg should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	 * dispatched.  Mark that @tg was empty.  This is automatically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	 * cleared on the next tg_update_disptime().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	if (!sq->nr_queued[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 		tg->flags |= THROTL_TG_WAS_EMPTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	sq->nr_queued[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	throtl_enqueue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) static void tg_update_disptime(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	bio = throtl_peek_queued(&sq->queued[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	if (bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 		tg_may_dispatch(tg, bio, &read_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	bio = throtl_peek_queued(&sq->queued[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	if (bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 		tg_may_dispatch(tg, bio, &write_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	min_wait = min(read_wait, write_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	disptime = jiffies + min_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	/* Update dispatch time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	throtl_dequeue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	tg->disptime = disptime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	throtl_enqueue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	/* see throtl_add_bio_tg() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	tg->flags &= ~THROTL_TG_WAS_EMPTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) static void start_parent_slice_with_credit(struct throtl_grp *child_tg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 					struct throtl_grp *parent_tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	if (throtl_slice_used(parent_tg, rw)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 		throtl_start_new_slice_with_credit(parent_tg, rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 				child_tg->slice_start[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	struct throtl_service_queue *parent_sq = sq->parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	struct throtl_grp *parent_tg = sq_to_tg(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	struct throtl_grp *tg_to_put = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	 * @bio is being transferred from @tg to @parent_sq.  Popping a bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	 * from @tg may put its reference and @parent_sq might end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	 * getting released prematurely.  Remember the tg to put and put it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	 * after @bio is transferred to @parent_sq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	sq->nr_queued[rw]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	throtl_charge_bio(tg, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	 * If our parent is another tg, we just need to transfer @bio to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	 * the parent using throtl_add_bio_tg().  If our parent is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	 * @td->service_queue, @bio is ready to be issued.  Put it on its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	 * bio_lists[] and decrease total number queued.  The caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	 * responsible for issuing these bios.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	if (parent_tg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 		throtl_add_bio_tg(bio, &tg->qnode_on_parent[rw], parent_tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 		start_parent_slice_with_credit(tg, parent_tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 		throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 				     &parent_sq->queued[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		BUG_ON(tg->td->nr_queued[rw] <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		tg->td->nr_queued[rw]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	throtl_trim_slice(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	if (tg_to_put)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		blkg_put(tg_to_blkg(tg_to_put));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static int throtl_dispatch_tg(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	unsigned int nr_reads = 0, nr_writes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	unsigned int max_nr_reads = THROTL_GRP_QUANTUM * 3 / 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	unsigned int max_nr_writes = THROTL_GRP_QUANTUM - max_nr_reads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	/* Try to dispatch 75% READS and 25% WRITES */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	while ((bio = throtl_peek_queued(&sq->queued[READ])) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	       tg_may_dispatch(tg, bio, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 		tg_dispatch_one_bio(tg, bio_data_dir(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 		nr_reads++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 		if (nr_reads >= max_nr_reads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	while ((bio = throtl_peek_queued(&sq->queued[WRITE])) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	       tg_may_dispatch(tg, bio, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 		tg_dispatch_one_bio(tg, bio_data_dir(bio));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		nr_writes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 		if (nr_writes >= max_nr_writes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	return nr_reads + nr_writes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	unsigned int nr_disp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 		struct throtl_service_queue *sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		if (!parent_sq->nr_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 		tg = throtl_rb_first(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 		if (time_before(jiffies, tg->disptime))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 		throtl_dequeue_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 		nr_disp += throtl_dispatch_tg(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 		sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 		if (sq->nr_queued[0] || sq->nr_queued[1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 			tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 		if (nr_disp >= THROTL_QUANTUM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	return nr_disp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static bool throtl_can_upgrade(struct throtl_data *td,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	struct throtl_grp *this_tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)  * throtl_pending_timer_fn - timer function for service_queue->pending_timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259)  * @t: the pending_timer member of the throtl_service_queue being serviced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261)  * This timer is armed when a child throtl_grp with active bio's become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262)  * pending and queued on the service_queue's pending_tree and expires when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)  * the first child throtl_grp should be dispatched.  This function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)  * dispatches bio's from the children throtl_grps to the parent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265)  * service_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)  * If the parent's parent is another throtl_grp, dispatching is propagated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)  * by either arming its pending_timer or repeating dispatch directly.  If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)  * the top-level service_tree is reached, throtl_data->dispatch_work is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)  * kicked so that the ready bio's are issued.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) static void throtl_pending_timer_fn(struct timer_list *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	struct throtl_service_queue *sq = from_timer(sq, t, pending_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	struct throtl_grp *tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	struct throtl_data *td = sq_to_td(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	struct request_queue *q = td->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	struct throtl_service_queue *parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	bool dispatched;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	if (throtl_can_upgrade(td, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		throtl_upgrade_state(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	parent_sq = sq->parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	dispatched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 			   sq->nr_queued[READ] + sq->nr_queued[WRITE],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 			   sq->nr_queued[READ], sq->nr_queued[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 		ret = throtl_select_dispatch(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 			throtl_log(sq, "bios disp=%u", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 			dispatched = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 		if (throtl_schedule_next_dispatch(sq, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		/* this dispatch windows is still open, relax and repeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 		spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 		cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 		spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	if (!dispatched)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	if (parent_sq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 		/* @parent_sq is another throl_grp, propagate dispatch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 		if (tg->flags & THROTL_TG_WAS_EMPTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 			tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 			if (!throtl_schedule_next_dispatch(parent_sq, false)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 				/* window is already open, repeat dispatching */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 				sq = parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 				tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 				goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 		/* reached the top-level, queue issuing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 		queue_work(kthrotld_workqueue, &td->dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)  * blk_throtl_dispatch_work_fn - work function for throtl_data->dispatch_work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)  * @work: work item being executed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)  * This function is queued for execution when bios reach the bio_lists[]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)  * of throtl_data->service_queue.  Those bios are ready and issued by this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)  * function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) static void blk_throtl_dispatch_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	struct throtl_data *td = container_of(work, struct throtl_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 					      dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	struct throtl_service_queue *td_sq = &td->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	struct request_queue *q = td->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	struct bio_list bio_list_on_stack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	bio_list_init(&bio_list_on_stack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	for (rw = READ; rw <= WRITE; rw++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 		while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 			bio_list_add(&bio_list_on_stack, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	if (!bio_list_empty(&bio_list_on_stack)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 		blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 		while ((bio = bio_list_pop(&bio_list_on_stack)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 			submit_bio_noacct(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 		blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 			      int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 	struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	u64 v = *(u64 *)((void *)tg + off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	if (v == U64_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	return __blkg_prfill_u64(sf, pd, v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 			       int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	unsigned int v = *(unsigned int *)((void *)tg + off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	if (v == UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	return __blkg_prfill_u64(sf, pd, v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) static int tg_print_conf_u64(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) static int tg_print_conf_uint(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) static void tg_conf_updated(struct throtl_grp *tg, bool global)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 		   tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 		   tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	 * Update has_rules[] flags for the updated tg's subtree.  A tg is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	 * considered to have rules if either the tg itself or any of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	 * ancestors has rules.  This identifies groups without any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	 * restrictions in the whole hierarchy and allows them to bypass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	 * blk-throttle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 	blkg_for_each_descendant_pre(blkg, pos_css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		struct throtl_grp *this_tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 		struct throtl_grp *parent_tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 		tg_update_has_rules(this_tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 		/* ignore root/second level */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 		    !blkg->parent->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 		parent_tg = blkg_to_tg(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 		 * make sure all children has lower idle time threshold and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		 * higher latency target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 				parent_tg->idletime_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 		this_tg->latency_target = max(this_tg->latency_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 				parent_tg->latency_target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	 * We're already holding queue_lock and know @tg is valid.  Let's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	 * apply the new config directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	 * Restart the slices for both READ and WRITES. It might happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	 * that a group's limit are dropped suddenly and we don't want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	 * account recently dispatched IO with new low rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 	throtl_start_new_slice(tg, READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	throtl_start_new_slice(tg, WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	if (tg->flags & THROTL_TG_PENDING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 		tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 		throtl_schedule_next_dispatch(sq->parent_sq, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) static ssize_t tg_set_conf(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 			   char *buf, size_t nbytes, loff_t off, bool is_u64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	struct blkcg *blkcg = css_to_blkcg(of_css(of));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	struct blkg_conf_ctx ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 	struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	u64 v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 	if (sscanf(ctx.body, "%llu", &v) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 		goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	if (!v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 		v = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	tg = blkg_to_tg(ctx.blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	if (is_u64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 		*(u64 *)((void *)tg + of_cft(of)->private) = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	tg_conf_updated(tg, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	blkg_conf_finish(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 			       char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	return tg_set_conf(of, buf, nbytes, off, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 				char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	return tg_set_conf(of, buf, nbytes, off, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) static int tg_print_rwstat(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 			  blkg_prfill_rwstat, &blkcg_policy_throtl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 			  seq_cft(sf)->private, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) static u64 tg_prfill_rwstat_recursive(struct seq_file *sf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 				      struct blkg_policy_data *pd, int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	struct blkg_rwstat_sample sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 				  &sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	return __blkg_prfill_rwstat(sf, pd, &sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 			  tg_prfill_rwstat_recursive, &blkcg_policy_throtl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 			  seq_cft(sf)->private, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) static struct cftype throtl_legacy_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 		.name = "throttle.read_bps_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 		.private = offsetof(struct throtl_grp, bps[READ][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 		.seq_show = tg_print_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 		.write = tg_set_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 		.name = "throttle.write_bps_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		.private = offsetof(struct throtl_grp, bps[WRITE][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 		.seq_show = tg_print_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 		.write = tg_set_conf_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 		.name = "throttle.read_iops_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 		.private = offsetof(struct throtl_grp, iops[READ][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		.seq_show = tg_print_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 		.write = tg_set_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		.name = "throttle.write_iops_device",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		.private = offsetof(struct throtl_grp, iops[WRITE][LIMIT_MAX]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 		.seq_show = tg_print_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		.write = tg_set_conf_uint,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 		.name = "throttle.io_service_bytes",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 		.private = offsetof(struct throtl_grp, stat_bytes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		.seq_show = tg_print_rwstat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		.name = "throttle.io_service_bytes_recursive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		.private = offsetof(struct throtl_grp, stat_bytes),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 		.seq_show = tg_print_rwstat_recursive,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 		.name = "throttle.io_serviced",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 		.private = offsetof(struct throtl_grp, stat_ios),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 		.seq_show = tg_print_rwstat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 		.name = "throttle.io_serviced_recursive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 		.private = offsetof(struct throtl_grp, stat_ios),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 		.seq_show = tg_print_rwstat_recursive,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 	{ }	/* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 			 int off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	struct throtl_grp *tg = pd_to_tg(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	const char *dname = blkg_dev_name(pd->blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	char bufs[4][21] = { "max", "max", "max", "max" };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	u64 bps_dft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	unsigned int iops_dft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	char idle_time[26] = "";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	char latency_time[26] = "";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	if (!dname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	if (off == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 		bps_dft = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 		iops_dft = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 		bps_dft = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 		iops_dft = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	if (tg->bps_conf[READ][off] == bps_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	    tg->bps_conf[WRITE][off] == bps_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	    tg->iops_conf[READ][off] == iops_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	    tg->iops_conf[WRITE][off] == iops_dft &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	    (off != LIMIT_LOW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	if (tg->bps_conf[READ][off] != U64_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 			tg->bps_conf[READ][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	if (tg->bps_conf[WRITE][off] != U64_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 			tg->bps_conf[WRITE][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	if (tg->iops_conf[READ][off] != UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 		snprintf(bufs[2], sizeof(bufs[2]), "%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 			tg->iops_conf[READ][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	if (tg->iops_conf[WRITE][off] != UINT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 		snprintf(bufs[3], sizeof(bufs[3]), "%u",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 			tg->iops_conf[WRITE][off]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	if (off == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 		if (tg->idletime_threshold_conf == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 			strcpy(idle_time, " idle=max");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 				tg->idletime_threshold_conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 		if (tg->latency_target_conf == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 			strcpy(latency_time, " latency=max");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 			snprintf(latency_time, sizeof(latency_time),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 				" latency=%lu", tg->latency_target_conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 		   dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 		   latency_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) static int tg_print_limit(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 			  &blkcg_policy_throtl, seq_cft(sf)->private, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) static ssize_t tg_set_limit(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 			  char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 	struct blkcg *blkcg = css_to_blkcg(of_css(of));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	struct blkg_conf_ctx ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 	struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 	u64 v[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	unsigned long idle_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 	unsigned long latency_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	int index = of_cft(of)->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 	ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 	tg = blkg_to_tg(ctx.blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 	v[0] = tg->bps_conf[READ][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 	v[1] = tg->bps_conf[WRITE][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 	v[2] = tg->iops_conf[READ][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 	v[3] = tg->iops_conf[WRITE][index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 	idle_time = tg->idletime_threshold_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 	latency_time = tg->latency_target_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 		char tok[27];	/* wiops=18446744073709551616 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 		char *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 		u64 val = U64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 		int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 		if (sscanf(ctx.body, "%26s%n", tok, &len) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 		if (tok[0] == '\0')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 		ctx.body += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 		p = tok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 		strsep(&p, "=");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max")))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 			goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 		ret = -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 		if (!val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 			goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 		if (!strcmp(tok, "rbps") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 			v[0] = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 		else if (!strcmp(tok, "wbps") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 			v[1] = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 		else if (!strcmp(tok, "riops") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 			v[2] = min_t(u64, val, UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 		else if (!strcmp(tok, "wiops") && val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 			v[3] = min_t(u64, val, UINT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 		else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 			idle_time = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 		else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 			latency_time = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 			goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	tg->bps_conf[READ][index] = v[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 	tg->bps_conf[WRITE][index] = v[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	tg->iops_conf[READ][index] = v[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	tg->iops_conf[WRITE][index] = v[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	if (index == LIMIT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 		tg->bps[READ][index] = v[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 		tg->bps[WRITE][index] = v[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 		tg->iops[READ][index] = v[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 		tg->iops[WRITE][index] = v[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 		tg->bps_conf[READ][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 		tg->bps_conf[WRITE][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 		tg->iops_conf[READ][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 		tg->iops_conf[WRITE][LIMIT_MAX]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	tg->idletime_threshold_conf = idle_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	tg->latency_target_conf = latency_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 	/* force user to configure all settings for low limit  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 		tg->bps[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 		tg->bps[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 		tg->iops[READ][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 		tg->iops[WRITE][LIMIT_LOW] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		tg->latency_target = DFL_LATENCY_TARGET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 	} else if (index == LIMIT_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 		tg->idletime_threshold = tg->idletime_threshold_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 		tg->latency_target = tg->latency_target_conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	blk_throtl_update_limit_valid(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	if (tg->td->limit_valid[LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 		if (index == LIMIT_LOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 			tg->td->limit_index = LIMIT_LOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 		tg->td->limit_index = LIMIT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	tg_conf_updated(tg, index == LIMIT_LOW &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 		tg->td->limit_valid[LIMIT_LOW]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 	blkg_conf_finish(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) static struct cftype throtl_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 		.name = "low",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 		.flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 		.seq_show = tg_print_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 		.write = tg_set_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 		.private = LIMIT_LOW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 		.name = "max",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 		.flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 		.seq_show = tg_print_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 		.write = tg_set_limit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 		.private = LIMIT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 	{ }	/* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) static void throtl_shutdown_wq(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	struct throtl_data *td = q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 	cancel_work_sync(&td->dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) static struct blkcg_policy blkcg_policy_throtl = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 	.dfl_cftypes		= throtl_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 	.legacy_cftypes		= throtl_legacy_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 	.pd_alloc_fn		= throtl_pd_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 	.pd_init_fn		= throtl_pd_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 	.pd_online_fn		= throtl_pd_online,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	.pd_offline_fn		= throtl_pd_offline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	.pd_free_fn		= throtl_pd_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 	unsigned long rtime = jiffies, wtime = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	if (tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 		rtime = tg->last_low_overflow_time[READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	if (tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 		wtime = tg->last_low_overflow_time[WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 	return min(rtime, wtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) /* tg should not be an intermediate node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) static unsigned long tg_last_low_overflow_time(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	struct throtl_service_queue *parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 	struct throtl_grp *parent = tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	unsigned long ret = __tg_last_low_overflow_time(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 		parent_sq = parent->service_queue.parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 		parent = sq_to_tg(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 		if (!parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 		 * The parent doesn't have low limit, it always reaches low
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 		 * limit. Its overflow time is useless for children
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 		if (!parent->bps[READ][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 		    !parent->iops[READ][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 		    !parent->bps[WRITE][LIMIT_LOW] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 		    !parent->iops[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 		if (time_after(__tg_last_low_overflow_time(parent), ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 			ret = __tg_last_low_overflow_time(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static bool throtl_tg_is_idle(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	 * cgroup is idle if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	 * - single idle is too long, longer than a fixed value (in case user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	 *   configure a too big threshold) or 4 times of idletime threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 	 * - average think time is more than threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	 * - IO latency is largely below threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 	unsigned long time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 	bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 	ret = tg->latency_target == DFL_LATENCY_TARGET ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 	      tg->avg_idletime > tg->idletime_threshold ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	      (tg->latency_target && tg->bio_cnt &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 		tg->bad_bio_cnt * 5 < tg->bio_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	throtl_log(&tg->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 		tg->bio_cnt, ret, tg->td->scale);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 	bool read_limit, write_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	 * if cgroup reaches low limit (if low limit is 0, the cgroup always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	 * reaches), it's ok to upgrade to next limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 	read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 	write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 	if (!read_limit && !write_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 	if (read_limit && sq->nr_queued[READ] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 	    (!write_limit || sq->nr_queued[WRITE]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	if (write_limit && sq->nr_queued[WRITE] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 	    (!read_limit || sq->nr_queued[READ]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 	if (time_after_eq(jiffies,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 		tg_last_low_overflow_time(tg) + tg->td->throtl_slice) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 	    throtl_tg_is_idle(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 		if (throtl_tg_can_upgrade(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 		tg = sq_to_tg(tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 		if (!tg || !tg_to_blkg(tg)->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) static bool throtl_can_upgrade(struct throtl_data *td,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	struct throtl_grp *this_tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	if (td->limit_index != LIMIT_LOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 	if (time_before(jiffies, td->low_downgrade_time + td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 		struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 		if (tg == this_tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 		if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 		if (!throtl_hierarchy_can_upgrade(tg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) static void throtl_upgrade_check(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 	unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	if (tg->td->limit_index != LIMIT_LOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 	tg->last_check_time = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 	if (!time_after_eq(now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 	     __tg_last_low_overflow_time(tg) + tg->td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	if (throtl_can_upgrade(tg->td, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 		throtl_upgrade_state(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) static void throtl_upgrade_state(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 	throtl_log(&td->service_queue, "upgrade to max");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 	td->limit_index = LIMIT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	td->low_upgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 	td->scale = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 	blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 		struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 		struct throtl_service_queue *sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 		tg->disptime = jiffies - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 		throtl_select_dispatch(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 		throtl_schedule_next_dispatch(sq, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 	throtl_select_dispatch(&td->service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	throtl_schedule_next_dispatch(&td->service_queue, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	queue_work(kthrotld_workqueue, &td->dispatch_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) static void throtl_downgrade_state(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	td->scale /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	if (td->scale) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	td->limit_index = LIMIT_LOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	td->low_downgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) static bool throtl_tg_can_downgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	struct throtl_data *td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 	 * If cgroup is below low limit, consider downgrade and throttle other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 	 * cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	if (time_after_eq(now, td->low_upgrade_time + td->throtl_slice) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 	    time_after_eq(now, tg_last_low_overflow_time(tg) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 					td->throtl_slice) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	    (!throtl_tg_is_idle(tg) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	     !list_empty(&tg_to_blkg(tg)->blkcg->css.children)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) static bool throtl_hierarchy_can_downgrade(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 		if (!throtl_tg_can_downgrade(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 		tg = sq_to_tg(tg->service_queue.parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 		if (!tg || !tg_to_blkg(tg)->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) static void throtl_downgrade_check(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 	uint64_t bps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 	unsigned int iops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 	unsigned long elapsed_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 	unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 	if (tg->td->limit_index != LIMIT_MAX ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	    !tg->td->limit_valid[LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 	if (time_after(tg->last_check_time + tg->td->throtl_slice, now))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	elapsed_time = now - tg->last_check_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	tg->last_check_time = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 	if (time_before(now, tg_last_low_overflow_time(tg) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 			tg->td->throtl_slice))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	if (tg->bps[READ][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 		bps = tg->last_bytes_disp[READ] * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 		do_div(bps, elapsed_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 		if (bps >= tg->bps[READ][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 			tg->last_low_overflow_time[READ] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	if (tg->bps[WRITE][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 		bps = tg->last_bytes_disp[WRITE] * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 		do_div(bps, elapsed_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 		if (bps >= tg->bps[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 			tg->last_low_overflow_time[WRITE] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 	if (tg->iops[READ][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 		tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 		iops = tg->last_io_disp[READ] * HZ / elapsed_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 		if (iops >= tg->iops[READ][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 			tg->last_low_overflow_time[READ] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 	if (tg->iops[WRITE][LIMIT_LOW]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 		tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 		iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 		if (iops >= tg->iops[WRITE][LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 			tg->last_low_overflow_time[WRITE] = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	 * If cgroup is below low limit, consider downgrade and throttle other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	 * cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 	if (throtl_hierarchy_can_downgrade(tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 		throtl_downgrade_state(tg->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	tg->last_bytes_disp[READ] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	tg->last_bytes_disp[WRITE] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 	tg->last_io_disp[READ] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 	tg->last_io_disp[WRITE] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) static void blk_throtl_update_idletime(struct throtl_grp *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	unsigned long now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	unsigned long last_finish_time = tg->last_finish_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	if (last_finish_time == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 	now = ktime_get_ns() >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	if (now <= last_finish_time ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 	    last_finish_time == tg->checked_last_finish_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 	tg->avg_idletime = (tg->avg_idletime * 7 + now - last_finish_time) >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 	tg->checked_last_finish_time = last_finish_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) static void throtl_update_latency_buckets(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 	struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	int i, cpu, rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 	unsigned long last_latency[2] = { 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 	unsigned long latency[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 	if (!blk_queue_nonrot(td->queue) || !td->limit_valid[LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 	if (time_before(jiffies, td->last_calculate_time + HZ))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	td->last_calculate_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 	memset(avg_latency, 0, sizeof(avg_latency));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 	for (rw = READ; rw <= WRITE; rw++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 			struct latency_bucket *tmp = &td->tmp_buckets[rw][i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 			for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 				struct latency_bucket *bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 				/* this isn't race free, but ok in practice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 				bucket = per_cpu_ptr(td->latency_buckets[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 					cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 				tmp->total_latency += bucket[i].total_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 				tmp->samples += bucket[i].samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 				bucket[i].total_latency = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 				bucket[i].samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 			if (tmp->samples >= 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 				int samples = tmp->samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 				latency[rw] = tmp->total_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 				tmp->total_latency = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 				tmp->samples = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 				latency[rw] /= samples;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 				if (latency[rw] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 				avg_latency[rw][i].latency = latency[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 	for (rw = READ; rw <= WRITE; rw++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 			if (!avg_latency[rw][i].latency) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 				if (td->avg_buckets[rw][i].latency < last_latency[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 					td->avg_buckets[rw][i].latency =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 						last_latency[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 			if (!td->avg_buckets[rw][i].valid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 				latency[rw] = avg_latency[rw][i].latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 				latency[rw] = (td->avg_buckets[rw][i].latency * 7 +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 					avg_latency[rw][i].latency) >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 			td->avg_buckets[rw][i].latency = max(latency[rw],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 				last_latency[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 			td->avg_buckets[rw][i].valid = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 			last_latency[rw] = td->avg_buckets[rw][i].latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 		throtl_log(&td->service_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 			"Latency bucket %d: read latency=%ld, read valid=%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 			"write latency=%ld, write valid=%d", i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 			td->avg_buckets[READ][i].latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 			td->avg_buckets[READ][i].valid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 			td->avg_buckets[WRITE][i].latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 			td->avg_buckets[WRITE][i].valid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) static inline void throtl_update_latency_buckets(struct throtl_data *td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) void blk_throtl_charge_bio_split(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 	struct blkcg_gq *blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 	struct throtl_grp *parent = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 	struct throtl_service_queue *parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 	bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 		if (!parent->has_rules[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 		atomic_inc(&parent->io_split_cnt[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 		atomic_inc(&parent->last_io_split_cnt[rw]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 		parent_sq = parent->service_queue.parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 		parent = sq_to_tg(parent_sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	} while (parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) bool blk_throtl_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	struct request_queue *q = bio->bi_disk->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 	struct blkcg_gq *blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 	struct throtl_qnode *qn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 	struct throtl_grp *tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 	struct throtl_service_queue *sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 	bool rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 	bool throttled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 	struct throtl_data *td = tg->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 	/* see throtl_charge_bio() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 	if (bio_flagged(bio, BIO_THROTTLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 	if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) 		blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 				bio->bi_iter.bi_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 		blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 	if (!tg->has_rules[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	throtl_update_latency_buckets(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 	blk_throtl_update_idletime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 	sq = &tg->service_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 		if (tg->last_low_overflow_time[rw] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 			tg->last_low_overflow_time[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 		throtl_downgrade_check(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 		throtl_upgrade_check(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 		/* throtl is FIFO - if bios are already queued, should queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 		if (sq->nr_queued[rw])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 		/* if above limits, break to queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 		if (!tg_may_dispatch(tg, bio, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 			tg->last_low_overflow_time[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 			if (throtl_can_upgrade(td, tg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 				throtl_upgrade_state(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 				goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 		/* within limits, let's charge and dispatch directly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 		throtl_charge_bio(tg, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 		 * We need to trim slice even when bios are not being queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 		 * otherwise it might happen that a bio is not queued for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 		 * a long time and slice keeps on extending and trim is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 		 * called for a long time. Now if limits are reduced suddenly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 		 * we take into account all the IO dispatched so far at new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 		 * low rate and * newly queued IO gets a really long dispatch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 		 * time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 		 * So keep on trimming slice even if bio is not queued.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 		throtl_trim_slice(tg, rw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 		 * @bio passed through this layer without being throttled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 		 * Climb up the ladder.  If we're already at the top, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 		 * can be executed directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 		qn = &tg->qnode_on_parent[rw];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 		sq = sq->parent_sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 		tg = sq_to_tg(sq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 		if (!tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 	/* out-of-limit, queue to @tg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 	throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 		   rw == READ ? 'R' : 'W',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 		   tg->bytes_disp[rw], bio->bi_iter.bi_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 		   tg_bps_limit(tg, rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 		   tg->io_disp[rw], tg_iops_limit(tg, rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 		   sq->nr_queued[READ], sq->nr_queued[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	tg->last_low_overflow_time[rw] = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 	td->nr_queued[rw]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 	throtl_add_bio_tg(bio, qn, tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 	throttled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	 * Update @tg's dispatch time and force schedule dispatch if @tg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	 * was empty before @bio.  The forced scheduling isn't likely to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	 * cause undue delay as @bio is likely to be dispatched directly if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	 * its @tg's disptime is not in the future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 	if (tg->flags & THROTL_TG_WAS_EMPTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 		tg_update_disptime(tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 		throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	bio_set_flag(bio, BIO_THROTTLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 	if (throttled || !td->track_bio_latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 		bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 	return throttled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) static void throtl_track_latency(struct throtl_data *td, sector_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 	int op, unsigned long time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 	struct latency_bucket *latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 	int index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	if (!td || td->limit_index != LIMIT_LOW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	    !(op == REQ_OP_READ || op == REQ_OP_WRITE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	    !blk_queue_nonrot(td->queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	index = request_bucket_index(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	latency = get_cpu_ptr(td->latency_buckets[op]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 	latency[index].total_latency += time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 	latency[index].samples++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 	put_cpu_ptr(td->latency_buckets[op]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) void blk_throtl_stat_add(struct request *rq, u64 time_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 	struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 	struct throtl_data *td = q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 			     time_ns >> 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) void blk_throtl_bio_endio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 	struct throtl_grp *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 	u64 finish_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 	unsigned long finish_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 	unsigned long start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 	unsigned long lat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 	int rw = bio_data_dir(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 	blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 	tg = blkg_to_tg(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	if (!tg->td->limit_valid[LIMIT_LOW])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	finish_time_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 	tg->last_finish_time = finish_time_ns >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 	start_time = bio_issue_time(&bio->bi_issue) >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	finish_time = __bio_issue_time(finish_time_ns) >> 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 	if (!start_time || finish_time <= start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 	lat = finish_time - start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	/* this is only for bio based driver */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 	if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 		throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 				     bio_op(bio), lat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 	if (tg->latency_target && lat >= tg->td->filtered_latency) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 		int bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 		unsigned int threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 		bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 		threshold = tg->td->avg_buckets[rw][bucket].latency +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 			tg->latency_target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 		if (lat > threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 			tg->bad_bio_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 		 * Not race free, could get wrong count, which means cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 		 * will be throttled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 		tg->bio_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 	if (time_after(jiffies, tg->bio_cnt_reset_time) || tg->bio_cnt > 1024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 		tg->bio_cnt_reset_time = tg->td->throtl_slice + jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 		tg->bio_cnt /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 		tg->bad_bio_cnt /= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) int blk_throtl_init(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 	struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 	td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	if (!td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 	td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 		LATENCY_BUCKET_SIZE, __alignof__(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 	if (!td->latency_buckets[READ]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 		kfree(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 	td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 		LATENCY_BUCKET_SIZE, __alignof__(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 	if (!td->latency_buckets[WRITE]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 		free_percpu(td->latency_buckets[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 		kfree(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 	INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 	throtl_service_queue_init(&td->service_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 	q->td = td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 	td->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 	td->limit_valid[LIMIT_MAX] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 	td->limit_index = LIMIT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 	td->low_upgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 	td->low_downgrade_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 	/* activate policy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 	ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 		free_percpu(td->latency_buckets[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 		free_percpu(td->latency_buckets[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 		kfree(td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) void blk_throtl_exit(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 	BUG_ON(!q->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 	del_timer_sync(&q->td->service_queue.pending_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 	throtl_shutdown_wq(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 	blkcg_deactivate_policy(q, &blkcg_policy_throtl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 	free_percpu(q->td->latency_buckets[READ]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 	free_percpu(q->td->latency_buckets[WRITE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 	kfree(q->td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) void blk_throtl_register_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 	struct throtl_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 	td = q->td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 	BUG_ON(!td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 	if (blk_queue_nonrot(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 		td->throtl_slice = DFL_THROTL_SLICE_SSD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 		td->filtered_latency = LATENCY_FILTERED_SSD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 		td->throtl_slice = DFL_THROTL_SLICE_HD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 		td->filtered_latency = LATENCY_FILTERED_HD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 		for (i = 0; i < LATENCY_BUCKET_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 			td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 			td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 	/* if no low limit, use previous default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 	td->throtl_slice = DFL_THROTL_SLICE_HD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 	td->track_bio_latency = !queue_is_mq(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 	if (!td->track_bio_latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 		blk_stat_enable_accounting(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 	if (!q->td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 	return sprintf(page, "%u\n", jiffies_to_msecs(q->td->throtl_slice));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) ssize_t blk_throtl_sample_time_store(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 	const char *page, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 	unsigned long v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	unsigned long t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 	if (!q->td)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 	if (kstrtoul(page, 10, &v))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 	t = msecs_to_jiffies(v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	if (t == 0 || t > MAX_THROTL_SLICE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 	q->td->throtl_slice = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 	return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) static int __init throtl_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	if (!kthrotld_workqueue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 		panic("Failed to create kthrotld\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 	return blkcg_policy_register(&blkcg_policy_throtl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) module_init(throtl_init);