Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Common Block IO controller cgroup interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Based on ideas and code from CFQ, CFS and BFQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *		      Paolo Valente <paolo.valente@unimore.it>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * 	              Nauman Rafique <nauman@google.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  * For policy-specific per-blkcg data:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  *                    Arianna Avanzini <avanzini.arianna@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/ioprio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/kdev_t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/genhd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/ctype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/tracehook.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/psi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include "blk-ioprio.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #define MAX_KEY_LEN 100
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  * blkcg_pol_register_mutex nests outside of it and synchronizes entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  * policy [un]register operations including cgroup file additions /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  * removals.  Putting cgroup file registration outside blkcg_pol_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  * allows grabbing it from cgroup callbacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) static DEFINE_MUTEX(blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) static DEFINE_MUTEX(blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) struct blkcg blkcg_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) EXPORT_SYMBOL_GPL(blkcg_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) EXPORT_SYMBOL_GPL(blkcg_root_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) static LIST_HEAD(all_blkcgs);		/* protected by blkcg_pol_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) bool blkcg_debug_stats = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) static struct workqueue_struct *blkcg_punt_bio_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) static bool blkcg_policy_enabled(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 				 const struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	return pol && test_bit(pol->plid, q->blkcg_pols);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)  * blkg_free - free a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  * @blkg: blkg to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71)  * Free @blkg which may be partially allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) static void blkg_free(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	for (i = 0; i < BLKCG_MAX_POLS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 		if (blkg->pd[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	free_percpu(blkg->iostat_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	percpu_ref_exit(&blkg->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	kfree(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) static void __blkg_release(struct rcu_head *rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	WARN_ON(!bio_list_empty(&blkg->async_bios));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	/* release the blkcg and parent blkg refs this blkg has been holding */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	css_put(&blkg->blkcg->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	if (blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 		blkg_put(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	blkg_free(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103)  * A group is RCU protected, but having an rcu lock does not mean that one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104)  * can access all the fields of blkg and assume these are valid.  For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105)  * example, don't try to follow throtl_data and request queue links.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107)  * Having a reference to blkg under an rcu allows accesses to only values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108)  * local to groups like group stats and group rate limits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) static void blkg_release(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	call_rcu(&blkg->rcu_head, __blkg_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) static void blkg_async_bio_workfn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 					     async_bio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	struct bio_list bios = BIO_EMPTY_LIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	bool need_plug = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	/* as long as there are pending bios, @blkg can't go away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	spin_lock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	bio_list_merge(&bios, &blkg->async_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	bio_list_init(&blkg->async_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	spin_unlock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	/* start plug only when bio_list contains at least 2 bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	if (bios.head && bios.head->bi_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 		need_plug = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 		blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	while ((bio = bio_list_pop(&bios)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 		submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	if (need_plug)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 		blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144)  * blkg_alloc - allocate a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145)  * @blkcg: block cgroup the new blkg is associated with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146)  * @q: request_queue the new blkg is associated with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147)  * @gfp_mask: allocation mask to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149)  * Allocate a new blkg assocating @blkcg and @q.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 				   gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	int i, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	/* alloc and init base part */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 		goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	if (!blkg->iostat_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	blkg->q = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	INIT_LIST_HEAD(&blkg->q_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	spin_lock_init(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	bio_list_init(&blkg->async_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	blkg->blkcg = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	u64_stats_init(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 		u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 		struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 		struct blkg_policy_data *pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 		if (!blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		/* alloc per-policy data and attach it to blkg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 		pd = pol->pd_alloc_fn(gfp_mask, q, blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 		if (!pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 			goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 		blkg->pd[i] = pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 		pd->blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 		pd->plid = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) err_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	blkg_free(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 				      struct request_queue *q, bool update_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	 * Hint didn't match.  Look up from the radix tree.  Note that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	 * hint can only be updated under queue_lock as otherwise @blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	 * could have already been removed from blkg_tree.  The caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	 * responsible for grabbing queue_lock if @update_hint.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	if (blkg && blkg->q == q) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		if (update_hint) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 			lockdep_assert_held(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 			rcu_assign_pointer(blkcg->blkg_hint, blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 		return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229)  * If @new_blkg is %NULL, this function tries to allocate a new one as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230)  * necessary using %GFP_NOWAIT.  @new_blkg is always consumed on return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 				    struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 				    struct blkcg_gq *new_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	lockdep_assert_held(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	/* request_queue is dying, do not create/recreate a blkg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	if (blk_queue_dying(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 		ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 		goto err_free_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	/* blkg holds a reference to blkcg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	if (!css_tryget_online(&blkcg->css)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 		ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 		goto err_free_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	/* allocate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	if (!new_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 		new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		if (unlikely(!new_blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 			goto err_put_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	blkg = new_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	/* link parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	if (blkcg_parent(blkcg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 		blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 		if (WARN_ON_ONCE(!blkg->parent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 			ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 			goto err_put_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		blkg_get(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	/* invoke per-policy init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 		struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		if (blkg->pd[i] && pol->pd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 			pol->pd_init_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	/* insert */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	if (likely(!ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 		hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 		list_add(&blkg->q_node, &q->blkg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 			struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 			if (blkg->pd[i] && pol->pd_online_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 				pol->pd_online_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	blkg->online = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	/* @blkg failed fully initialized, use the usual release path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	blkg_put(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) err_put_css:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	css_put(&blkcg->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) err_free_blkg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314)  * blkg_lookup_create - lookup blkg, try to create one if not there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315)  * @blkcg: blkcg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316)  * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318)  * Lookup blkg for the @blkcg - @q pair.  If it doesn't exist, try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319)  * create one.  blkg creation is performed recursively from blkcg_root such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320)  * that all non-root blkg's have access to the parent blkg.  This function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321)  * should be called under RCU read lock and takes @q->queue_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323)  * Returns the blkg or the closest blkg if blkg_create() fails as it walks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324)  * down from root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 		struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	blkg = blkg_lookup(blkcg, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	if (blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 		return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	spin_lock_irqsave(&q->queue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	blkg = __blkg_lookup(blkcg, q, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	if (blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	 * non-root blkgs have access to their parents.  Returns the closest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	 * blkg to the intended blkg should blkg_create() fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 		struct blkcg *pos = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 		struct blkcg *parent = blkcg_parent(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 		struct blkcg_gq *ret_blkg = q->root_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 		while (parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 			blkg = __blkg_lookup(parent, q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 			if (blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 				/* remember closest blkg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 				ret_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 			pos = parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 			parent = blkcg_parent(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 		blkg = blkg_create(pos, q, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 			blkg = ret_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 		if (pos == blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 	spin_unlock_irqrestore(&q->queue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) static void blkg_destroy(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	lockdep_assert_held(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	lockdep_assert_held(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	/* Something wrong if we are trying to remove same group twice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	WARN_ON_ONCE(list_empty(&blkg->q_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 		if (blkg->pd[i] && pol->pd_offline_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 			pol->pd_offline_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	blkg->online = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	list_del_init(&blkg->q_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	hlist_del_init_rcu(&blkg->blkcg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	 * Both setting lookup hint to and clearing it from @blkg are done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	 * under queue_lock.  If it's not pointing to @blkg now, it never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	 * will.  Hint assignment itself can race safely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		rcu_assign_pointer(blkcg->blkg_hint, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	 * Put the reference taken at the time of creation so that when all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	 * queues are gone, group can be destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 	percpu_ref_kill(&blkg->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419)  * blkg_destroy_all - destroy all blkgs associated with a request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420)  * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422)  * Destroy all blkgs associated with @q.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) static void blkg_destroy_all(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	struct blkcg_gq *blkg, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 		spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 		blkg_destroy(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 		spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	q->root_blkg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) static int blkcg_reset_stats(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 			     struct cftype *cftype, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	int i, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	spin_lock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	 * Note that stat reset is racy - it doesn't synchronize against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	 * stat updates.  This is a debug feature which shouldn't exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	 * anyway.  If you get hit by a race, retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 		for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 			struct blkg_iostat_set *bis =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 				per_cpu_ptr(blkg->iostat_cpu, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 			memset(bis, 0, sizeof(*bis));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 		memset(&blkg->iostat, 0, sizeof(blkg->iostat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 			struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 			if (blkg->pd[i] && pol->pd_reset_stats_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 				pol->pd_reset_stats_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	spin_unlock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) const char *blkg_dev_name(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 	/* some drivers (floppy) instantiate a queue w/o disk registered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	if (blkg->q->backing_dev_info->dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		return bdi_dev_name(blkg->q->backing_dev_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486)  * blkcg_print_blkgs - helper for printing per-blkg data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487)  * @sf: seq_file to print to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488)  * @blkcg: blkcg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489)  * @prfill: fill function to print out a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490)  * @pol: policy in question
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491)  * @data: data to be passed to @prfill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492)  * @show_total: to print out sum of prfill return values or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494)  * This function invokes @prfill on each blkg of @blkcg if pd for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495)  * policy specified by @pol exists.  @prfill is invoked with @sf, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496)  * policy data and @data and the matching queue lock held.  If @show_total
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497)  * is %true, the sum of the return values from @prfill is printed with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498)  * "Total" label at the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500)  * This is to be used to construct print functions for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501)  * cftype->read_seq_string method.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 		       u64 (*prfill)(struct seq_file *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 				     struct blkg_policy_data *, int),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 		       const struct blkcg_policy *pol, int data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 		       bool show_total)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	u64 total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 		spin_lock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 		if (blkcg_policy_enabled(blkg->q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 			total += prfill(sf, blkg->pd[pol->plid], data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 		spin_unlock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	if (show_total)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		seq_printf(sf, "Total %llu\n", (unsigned long long)total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527)  * __blkg_prfill_u64 - prfill helper for a single u64 value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528)  * @sf: seq_file to print to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529)  * @pd: policy private data of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530)  * @v: value to print
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532)  * Print @v to @sf for the device assocaited with @pd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	const char *dname = blkg_dev_name(pd->blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	if (!dname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	return v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) /* Performs queue bypass and policy enabled checks then looks up blkg. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 					  const struct blkcg_policy *pol,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 					  struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	lockdep_assert_held(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	if (!blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		return ERR_PTR(-EOPNOTSUPP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	return __blkg_lookup(blkcg, q, true /* update_hint */);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560)  * blkg_conf_prep - parse and prepare for per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561)  * @inputp: input string pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563)  * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564)  * from @input and get and return the matching gendisk.  *@inputp is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565)  * updated to point past the device node prefix.  Returns an ERR_PTR()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566)  * value on error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568)  * Use this function iff blkg_conf_prep() can't be used for some reason.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) struct gendisk *blkcg_conf_get_disk(char **inputp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	char *input = *inputp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	unsigned int major, minor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	struct gendisk *disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	int key_len, part;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	input += key_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	if (!isspace(*input))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	input = skip_spaces(input);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	disk = get_gendisk(MKDEV(major, minor), &part);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	if (!disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 		return ERR_PTR(-ENODEV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	if (part) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 		put_disk_and_module(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 		return ERR_PTR(-ENODEV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	*inputp = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	return disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598)  * blkg_conf_prep - parse and prepare for per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599)  * @blkcg: target block cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600)  * @pol: target policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601)  * @input: input string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602)  * @ctx: blkg_conf_ctx to be filled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604)  * Parse per-blkg config update from @input and initialize @ctx with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605)  * result.  @ctx->blkg points to the blkg to be updated and @ctx->body the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606)  * part of @input following MAJ:MIN.  This function returns with RCU read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607)  * lock and queue lock held and must be paired with blkg_conf_finish().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 		   char *input, struct blkg_conf_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	__acquires(rcu) __acquires(&disk->queue->queue_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	struct gendisk *disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	disk = blkcg_conf_get_disk(&input);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	if (IS_ERR(disk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 		return PTR_ERR(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	q = disk->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	blkg = blkg_lookup_check(blkcg, pol, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 		ret = PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		goto fail_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	if (blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 		goto success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	 * non-root blkgs have access to their parents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 		struct blkcg *pos = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 		struct blkcg *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 		struct blkcg_gq *new_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 		parent = blkcg_parent(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 		while (parent && !__blkg_lookup(parent, q, false)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 			pos = parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 			parent = blkcg_parent(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 		/* Drop locks to do new blkg allocation with GFP_KERNEL. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 		spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		if (unlikely(!new_blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		if (radix_tree_preload(GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 			blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 		spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		blkg = blkg_lookup_check(pos, pol, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 		if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			ret = PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 			blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 			goto fail_preloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		if (blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 			blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 			blkg = blkg_create(pos, q, new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 			if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 				ret = PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 				goto fail_preloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		if (pos == blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 			goto success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) success:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	ctx->disk = disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	ctx->blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	ctx->body = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) fail_preloaded:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) fail_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	put_disk_and_module(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	 * If queue was bypassing, we should retry.  Do so after a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	 * short msleep().  It isn't strictly necessary but queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	 * can be bypassing for some time and it's always nice to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	 * avoid busy looping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	if (ret == -EBUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 		msleep(10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		ret = restart_syscall();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) EXPORT_SYMBOL_GPL(blkg_conf_prep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720)  * blkg_conf_finish - finish up per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721)  * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723)  * Finish up after per-blkg config update.  This function must be paired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724)  * with blkg_conf_prep().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) void blkg_conf_finish(struct blkg_conf_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	__releases(&ctx->disk->queue->queue_lock) __releases(rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	spin_unlock_irq(&ctx->disk->queue->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	put_disk_and_module(ctx->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) EXPORT_SYMBOL_GPL(blkg_conf_finish);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 		dst->bytes[i] = src->bytes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 		dst->ios[i] = src->ios[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 		dst->bytes[i] += src->bytes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 		dst->ios[i] += src->ios[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	for (i = 0; i < BLKG_IOSTAT_NR; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 		dst->bytes[i] -= src->bytes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		dst->ios[i] -= src->ios[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 		struct blkcg_gq *parent = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 		struct blkg_iostat cur, delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 		unsigned int seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 		/* fetch the current per-cpu values */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 			seq = u64_stats_fetch_begin(&bisc->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 			blkg_iostat_set(&cur, &bisc->cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 		} while (u64_stats_fetch_retry(&bisc->sync, seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 		/* propagate percpu delta to global */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 		u64_stats_update_begin(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		blkg_iostat_set(&delta, &cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 		blkg_iostat_sub(&delta, &bisc->last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 		blkg_iostat_add(&blkg->iostat.cur, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 		blkg_iostat_add(&bisc->last, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		u64_stats_update_end(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		/* propagate global delta to parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		if (parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 			u64_stats_update_begin(&parent->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 			blkg_iostat_set(&delta, &blkg->iostat.cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 			blkg_iostat_sub(&delta, &blkg->iostat.last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 			blkg_iostat_add(&parent->iostat.cur, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 			blkg_iostat_add(&blkg->iostat.last, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 			u64_stats_update_end(&parent->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807)  * The rstat algorithms intentionally don't handle the root cgroup to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)  * incurring overhead when no cgroups are defined. For that reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809)  * cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810)  * iostat in the root cgroup's blkcg_gq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  * However, we would like to re-use the printing code between the root and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  * non-root cgroups to the extent possible. For that reason, we simulate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814)  * flushing the root cgroup's stats by explicitly filling in the iostat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815)  * with disk level statistics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) static void blkcg_fill_root_iostats(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	struct class_dev_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	struct device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	while ((dev = class_dev_iter_next(&iter))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		struct gendisk *disk = dev_to_disk(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		struct hd_struct *part = disk_get_part(disk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 		struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 		struct blkg_iostat tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 		int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		memset(&tmp, 0, sizeof(tmp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 			struct disk_stats *cpu_dkstats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 			cpu_dkstats = per_cpu_ptr(part->dkstats, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 			tmp.ios[BLKG_IOSTAT_READ] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 				cpu_dkstats->ios[STAT_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 			tmp.ios[BLKG_IOSTAT_WRITE] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 				cpu_dkstats->ios[STAT_WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 			tmp.ios[BLKG_IOSTAT_DISCARD] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 				cpu_dkstats->ios[STAT_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 			// convert sectors to bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 			tmp.bytes[BLKG_IOSTAT_READ] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 				cpu_dkstats->sectors[STAT_READ] << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 			tmp.bytes[BLKG_IOSTAT_WRITE] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 				cpu_dkstats->sectors[STAT_WRITE] << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 			tmp.bytes[BLKG_IOSTAT_DISCARD] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 				cpu_dkstats->sectors[STAT_DISCARD] << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 			u64_stats_update_begin(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 			blkg_iostat_set(&blkg->iostat.cur, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 			u64_stats_update_end(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		disk_put_part(part);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) static int blkcg_print_stat(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	if (!seq_css(sf)->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		blkcg_fill_root_iostats();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 		cgroup_rstat_flush(blkcg->css.cgroup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		struct blkg_iostat_set *bis = &blkg->iostat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		const char *dname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		char *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		u64 rbytes, wbytes, rios, wios, dbytes, dios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 		size_t size = seq_get_buf(sf, &buf), off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 		bool has_stats = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 		unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		spin_lock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 		if (!blkg->online)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 			goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 		dname = blkg_dev_name(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		if (!dname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 			goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		 * Hooray string manipulation, count is the size written NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		 * INCLUDING THE \0, so size is now count+1 less than what we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		 * had before, but we want to start writing the next bit from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		 * the \0 so we only add count to buf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		off += scnprintf(buf+off, size-off, "%s ", dname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 			seq = u64_stats_fetch_begin(&bis->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 			rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 			wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 			dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 			rios = bis->cur.ios[BLKG_IOSTAT_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 			wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 			dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		} while (u64_stats_fetch_retry(&bis->sync, seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		if (rbytes || wbytes || rios || wios) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 			has_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 			off += scnprintf(buf+off, size-off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 					 rbytes, wbytes, rios, wios,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 					 dbytes, dios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 			has_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 			off += scnprintf(buf+off, size-off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 					 " use_delay=%d delay_nsec=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 					 atomic_read(&blkg->use_delay),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 					(unsigned long long)atomic64_read(&blkg->delay_nsec));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 			struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 			size_t written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 			if (!blkg->pd[i] || !pol->pd_stat_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 			written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 			if (written)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 				has_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 			off += written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 		if (has_stats) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 			if (off < size - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 				off += scnprintf(buf+off, size-off, "\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 				seq_commit(sf, off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 				seq_commit(sf, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		spin_unlock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) static struct cftype blkcg_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		.name = "stat",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		.seq_show = blkcg_print_stat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	{ }	/* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) static struct cftype blkcg_legacy_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		.name = "reset_stats",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 		.write_u64 = blkcg_reset_stats,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	{ }	/* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969)  * blkcg destruction is a three-stage process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971)  * 1. Destruction starts.  The blkcg_css_offline() callback is invoked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972)  *    which offlines writeback.  Here we tie the next stage of blkg destruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973)  *    to the completion of writeback associated with the blkcg.  This lets us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974)  *    avoid punting potentially large amounts of outstanding writeback to root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975)  *    while maintaining any ongoing policies.  The next stage is triggered when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976)  *    the nr_cgwbs count goes to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978)  * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979)  *    and handles the destruction of blkgs.  Here the css reference held by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980)  *    the blkg is put back eventually allowing blkcg_css_free() to be called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981)  *    This work may occur in cgwb_release_workfn() on the cgwb_release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982)  *    workqueue.  Any submitted ios that fail to get the blkg ref will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983)  *    punted to the root_blkg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985)  * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986)  *    This finally frees the blkcg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990)  * blkcg_css_offline - cgroup css_offline callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991)  * @css: css of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993)  * This function is called when @css is about to go away.  Here the cgwbs are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994)  * offlined first and only once writeback associated with the blkcg has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995)  * finished do we start step 2 (see above).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) static void blkcg_css_offline(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	/* this prevents anyone from attaching or migrating to this blkcg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	wb_blkcg_offline(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	/* put the base online pin allowing step 2 to be triggered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	blkcg_unpin_online(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)  * blkcg_destroy_blkgs - responsible for shooting down blkgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)  * @blkcg: blkcg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)  * blkgs should be removed while holding both q and blkcg locks.  As blkcg lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)  * is nested inside q lock, this function performs reverse double lock dancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)  * Destroying the blkgs releases the reference held on the blkcg's css allowing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)  * blkcg_css_free to eventually be called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)  * This is the blkcg counterpart of ioc_release_fn().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) void blkcg_destroy_blkgs(struct blkcg *blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	spin_lock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	while (!hlist_empty(&blkcg->blkg_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 						struct blkcg_gq, blkcg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 		struct request_queue *q = blkg->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 		if (need_resched() || !spin_trylock(&q->queue_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 			 * Given that the system can accumulate a huge number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 			 * of blkgs in pathological cases, check to see if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 			 * need to rescheduling to avoid softlockup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 			spin_unlock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 			spin_lock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		blkg_destroy(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		spin_unlock(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	spin_unlock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) static void blkcg_css_free(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	list_del(&blkcg->all_blkcgs_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	for (i = 0; i < BLKCG_MAX_POLS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		if (blkcg->cpd[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 			blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	kfree(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) static struct cgroup_subsys_state *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	struct cgroup_subsys_state *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	if (!parent_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 		blkcg = &blkcg_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 		blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		if (!blkcg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 			ret = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 			goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 	for (i = 0; i < BLKCG_MAX_POLS ; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 		struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		struct blkcg_policy_data *cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 		 * If the policy hasn't been attached yet, wait for it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 		 * to be attached before doing anything else. Otherwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 		 * check if the policy requires any specific per-cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		 * data: if it does, allocate and initialize it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 		if (!pol || !pol->cpd_alloc_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 		cpd = pol->cpd_alloc_fn(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 		if (!cpd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 			ret = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 			goto free_pd_blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 		blkcg->cpd[i] = cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 		cpd->blkcg = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 		cpd->plid = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 		if (pol->cpd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 			pol->cpd_init_fn(cpd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	spin_lock_init(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	refcount_set(&blkcg->online_pin, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	INIT_HLIST_HEAD(&blkcg->blkg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) #ifdef CONFIG_CGROUP_WRITEBACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	INIT_LIST_HEAD(&blkcg->cgwb_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	return &blkcg->css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) free_pd_blkcg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	for (i--; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 		if (blkcg->cpd[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 			blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	if (blkcg != &blkcg_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 		kfree(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) static int blkcg_css_online(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	struct blkcg *parent = blkcg_parent(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	 * blkcg_pin_online() is used to delay blkcg offline so that blkgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	 * don't go offline while cgwbs are still active on them.  Pin the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	 * parent so that offline always happens towards the root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	if (parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 		blkcg_pin_online(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)  * blkcg_init_queue - initialize blkcg part of request queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)  * @q: request_queue to initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)  * Called from blk_alloc_queue(). Responsible for initializing blkcg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)  * part of new request_queue @q.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)  * RETURNS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)  * 0 on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) int blkcg_init_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	struct blkcg_gq *new_blkg, *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	bool preloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	if (!new_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	preloaded = !radix_tree_preload(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	/* Make sure the root blkg exists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	blkg = blkg_create(&blkcg_root, q, new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	if (IS_ERR(blkg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 		goto err_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	q->root_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	if (preloaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 		radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	ret = blk_ioprio_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		goto err_destroy_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	ret = blk_throtl_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		goto err_destroy_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	ret = blk_iolatency_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 		blk_throtl_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 		goto err_destroy_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) err_destroy_all:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	blkg_destroy_all(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) err_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	if (preloaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 		radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	return PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)  * blkcg_exit_queue - exit and release blkcg part of request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)  * @q: request_queue being released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)  * Called from blk_exit_queue().  Responsible for exiting blkcg part.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) void blkcg_exit_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	blkg_destroy_all(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	blk_throtl_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)  * We cannot support shared io contexts, as we have no mean to support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)  * two tasks with the same ioc in two different groups without major rework
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)  * of the main cic data structures.  For now we allow a task to change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)  * its cgroup only if it's the only owner of its ioc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) static int blkcg_can_attach(struct cgroup_taskset *tset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	struct cgroup_subsys_state *dst_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	struct io_context *ioc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	/* task_lock() is needed to avoid races with exit_io_context() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	cgroup_taskset_for_each(task, dst_css, tset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 		task_lock(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 		ioc = task->io_context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 			ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 		task_unlock(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) static void blkcg_bind(struct cgroup_subsys_state *root_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 		struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		if (!pol || !pol->cpd_bind_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 			if (blkcg->cpd[pol->plid])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 				pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) static void blkcg_exit(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	if (tsk->throttle_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 		blk_put_queue(tsk->throttle_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	tsk->throttle_queue = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) struct cgroup_subsys io_cgrp_subsys = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	.css_alloc = blkcg_css_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	.css_online = blkcg_css_online,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	.css_offline = blkcg_css_offline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	.css_free = blkcg_css_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	.can_attach = blkcg_can_attach,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	.css_rstat_flush = blkcg_rstat_flush,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	.bind = blkcg_bind,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	.dfl_cftypes = blkcg_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	.legacy_cftypes = blkcg_legacy_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	.legacy_name = "blkio",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	.exit = blkcg_exit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) #ifdef CONFIG_MEMCG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	 * This ensures that, if available, memcg is automatically enabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	 * together on the default hierarchy so that the owner cgroup can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	 * be retrieved from writeback pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	.depends_on = 1 << memory_cgrp_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) EXPORT_SYMBOL_GPL(io_cgrp_subsys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)  * blkcg_activate_policy - activate a blkcg policy on a request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)  * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)  * @pol: blkcg policy to activate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)  * Activate @pol on @q.  Requires %GFP_KERNEL context.  @q goes through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)  * bypass mode to populate its blkgs with policy_data for @pol.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)  * Activation happens with @q bypassed, so nobody would be accessing blkgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)  * from IO path.  Update of each blkg is protected by both queue and blkcg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)  * locks so that holding either lock and testing blkcg_policy_enabled() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)  * always enough for dereferencing policy data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)  * The caller is responsible for synchronizing [de]activations and policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)  * [un]registerations.  Returns 0 on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) int blkcg_activate_policy(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 			  const struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	struct blkg_policy_data *pd_prealloc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	struct blkcg_gq *blkg, *pinned_blkg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	if (blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 		blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	/* blkg_list is pushed at the head, reverse walk to allocate parents first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 		struct blkg_policy_data *pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 		if (blkg->pd[pol->plid])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 		/* If prealloc matches, use it; otherwise try GFP_NOWAIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 		if (blkg == pinned_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 			pd = pd_prealloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 			pd_prealloc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 			pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 					      blkg->blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 		if (!pd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 			 * GFP_NOWAIT failed.  Free the existing one and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 			 * prealloc for @blkg w/ GFP_KERNEL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 			if (pinned_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 				blkg_put(pinned_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 			blkg_get(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 			pinned_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 			spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 			if (pd_prealloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 				pol->pd_free_fn(pd_prealloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 			pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 						       blkg->blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 			if (pd_prealloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 				goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 				goto enomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 		blkg->pd[pol->plid] = pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		pd->blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 		pd->plid = pol->plid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 	/* all allocated, init in the same order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	if (pol->pd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 			pol->pd_init_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	__set_bit(pol->plid, q->blkcg_pols);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	if (pinned_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 		blkg_put(pinned_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	if (pd_prealloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		pol->pd_free_fn(pd_prealloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) enomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	/* alloc failed, nothing's initialized yet, free everything */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	list_for_each_entry(blkg, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 		struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 		if (blkg->pd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 			pol->pd_free_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 			blkg->pd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 		spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) EXPORT_SYMBOL_GPL(blkcg_activate_policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)  * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)  * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414)  * @pol: blkcg policy to deactivate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)  * Deactivate @pol on @q.  Follows the same synchronization rules as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)  * blkcg_activate_policy().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) void blkcg_deactivate_policy(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 			     const struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	if (!blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 	if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	__clear_bit(pol->plid, q->blkcg_pols);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 	list_for_each_entry(blkg, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 		spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 		if (blkg->pd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 			if (pol->pd_offline_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 				pol->pd_offline_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 			pol->pd_free_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 			blkg->pd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 		spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 		blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)  * blkcg_policy_register - register a blkcg policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)  * @pol: blkcg policy to register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)  * Register @pol with blkcg core.  Might sleep and @pol may be modified on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459)  * successful registration.  Returns 0 on success and -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) int blkcg_policy_register(struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 	int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	mutex_lock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 	mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	/* find an empty slot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 	for (i = 0; i < BLKCG_MAX_POLS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 		if (!blkcg_policy[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	if (i >= BLKCG_MAX_POLS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 		pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 		goto err_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	/* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 		(!pol->pd_alloc_fn ^ !pol->pd_free_fn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 		goto err_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	/* register @pol */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	pol->plid = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	blkcg_policy[pol->plid] = pol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	/* allocate and install cpd's */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	if (pol->cpd_alloc_fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 			struct blkcg_policy_data *cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 			cpd = pol->cpd_alloc_fn(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 			if (!cpd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 				goto err_free_cpds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 			blkcg->cpd[pol->plid] = cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 			cpd->blkcg = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 			cpd->plid = pol->plid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 			if (pol->cpd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 				pol->cpd_init_fn(cpd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	/* everything is in place, add intf files for the new policy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	if (pol->dfl_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 		WARN_ON(cgroup_add_dfl_cftypes(&io_cgrp_subsys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 					       pol->dfl_cftypes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	if (pol->legacy_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 		WARN_ON(cgroup_add_legacy_cftypes(&io_cgrp_subsys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 						  pol->legacy_cftypes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	mutex_unlock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) err_free_cpds:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	if (pol->cpd_free_fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 			if (blkcg->cpd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 				blkcg->cpd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	blkcg_policy[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) err_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	mutex_unlock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) EXPORT_SYMBOL_GPL(blkcg_policy_register);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535)  * blkcg_policy_unregister - unregister a blkcg policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)  * @pol: blkcg policy to unregister
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)  * Undo blkcg_policy_register(@pol).  Might sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) void blkcg_policy_unregister(struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 	struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	mutex_lock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 	if (WARN_ON(blkcg_policy[pol->plid] != pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	/* kill the intf files first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 	if (pol->dfl_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		cgroup_rm_cftypes(pol->dfl_cftypes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 	if (pol->legacy_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		cgroup_rm_cftypes(pol->legacy_cftypes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 	/* remove cpds and unregister */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	if (pol->cpd_free_fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 			if (blkcg->cpd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 				pol->cpd_free_fn(blkcg->cpd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 				blkcg->cpd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 	blkcg_policy[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 	mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	mutex_unlock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) bool __blkcg_punt_bio_submit(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 	struct blkcg_gq *blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 	/* consume the flag first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 	bio->bi_opf &= ~REQ_CGROUP_PUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	/* never bounce for the root cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	if (!blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	spin_lock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	bio_list_add(&blkg->async_bios, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	spin_unlock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)  * Scale the accumulated delay based on how long it has been since we updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595)  * the delay.  We only call this when we are adding delay, in case it's been a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)  * while since we added delay, and when we are checking to see if we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597)  * delay a task, to account for any delays that may have occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	u64 old = atomic64_read(&blkg->delay_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	/* negative use_delay means no scaling, see blkcg_set_delay() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	if (atomic_read(&blkg->use_delay) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	 * We only want to scale down every second.  The idea here is that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	 * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	 * time window.  We only want to throttle tasks for recent delay that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	 * has occurred, in 1 second time windows since that's the maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	 * things can be throttled.  We save the current delay window in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	 * blkg->last_delay so we know what amount is still left to be charged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	 * to the blkg from this point onward.  blkg->last_use keeps track of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	 * the use_delay counter.  The idea is if we're unthrottling the blkg we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	 * are ok with whatever is happening now, and we can take away more of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	 * the accumulated delay as we've already throttled enough that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	 * everybody is happy with their IO latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	if (time_before64(old + NSEC_PER_SEC, now) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	    atomic64_cmpxchg(&blkg->delay_start, old, now) == old) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 		u64 cur = atomic64_read(&blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 		u64 sub = min_t(u64, blkg->last_delay, now - old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 		int cur_use = atomic_read(&blkg->use_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 		 * We've been unthrottled, subtract a larger chunk of our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 		 * accumulated delay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 		if (cur_use < blkg->last_use)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 			sub = max_t(u64, sub, blkg->last_delay >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 		 * This shouldn't happen, but handle it anyway.  Our delay_nsec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 		 * should only ever be growing except here where we subtract out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 		 * min(last_delay, 1 second), but lord knows bugs happen and I'd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 		 * rather not end up with negative numbers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 		if (unlikely(cur < sub)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 			atomic64_set(&blkg->delay_nsec, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 			blkg->last_delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 			atomic64_sub(sub, &blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 			blkg->last_delay = cur - sub;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 		blkg->last_use = cur_use;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)  * This is called when we want to actually walk up the hierarchy and check to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652)  * see if we need to throttle, and then actually throttle if there is some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653)  * accumulated delay.  This should only be called upon return to user space so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654)  * we're not holding some lock that would induce a priority inversion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	unsigned long pflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 	bool clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 	u64 now = ktime_to_ns(ktime_get());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	u64 exp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 	u64 delay_nsec = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 	int tok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 	while (blkg->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 		int use_delay = atomic_read(&blkg->use_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 		if (use_delay) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 			u64 this_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 			blkcg_scale_delay(blkg, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 			this_delay = atomic64_read(&blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 			if (this_delay > delay_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 				delay_nsec = this_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 				clamp = use_delay > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 		blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 	if (!delay_nsec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	 * Let's not sleep for all eternity if we've amassed a huge delay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 	 * Swapping or metadata IO can accumulate 10's of seconds worth of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	 * delay, and we want userspace to be able to do _something_ so cap the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	 * delays at 0.25s. If there's 10's of seconds worth of delay then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	 * tasks will be delayed for 0.25 second for every syscall. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 	 * blkcg_set_delay() was used as indicated by negative use_delay, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	 * caller is responsible for regulating the range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 	if (clamp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 		delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	if (use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 		psi_memstall_enter(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 	exp = ktime_add_ns(now, delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 	tok = io_schedule_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 		__set_current_state(TASK_KILLABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 		if (!schedule_hrtimeout(&exp, HRTIMER_MODE_ABS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	} while (!fatal_signal_pending(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	io_schedule_finish(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	if (use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 		psi_memstall_leave(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)  * blkcg_maybe_throttle_current - throttle the current task if it has been marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715)  * This is only called if we've been marked with set_notify_resume().  Obviously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)  * we can be set_notify_resume() for reasons other than blkcg throttling, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)  * check to see if current->throttle_queue is set and if not this doesn't do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718)  * anything.  This should only ever be called by the resume code, it's not meant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)  * to be called by people willy-nilly as it will actually do the work to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)  * throttle the task if it is setup for throttling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) void blkcg_maybe_throttle_current(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	struct request_queue *q = current->throttle_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 	bool use_memdelay = current->use_memdelay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	current->throttle_queue = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 	current->use_memdelay = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 	css = kthread_blkcg();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	if (css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 		blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 		blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 	if (!blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 	blkg = blkg_lookup(blkcg, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	if (!blkg_tryget(blkg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	blkg_put(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	blk_put_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	blk_put_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762)  * blkcg_schedule_throttle - this task needs to check for throttling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763)  * @q: the request queue IO was submitted on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)  * @use_memdelay: do we charge this to memory delay for PSI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766)  * This is called by the IO controller when we know there's delay accumulated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767)  * for the blkg for this task.  We do not pass the blkg because there are places
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768)  * we call this that may not have that information, the swapping code for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)  * instance will only have a request_queue at that point.  This set's the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)  * notify_resume for the task to check and see if it requires throttling before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)  * returning to user space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773)  * We will only schedule once per syscall.  You can call this over and over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774)  * again and it will only do the check once upon return to user space, and only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775)  * throttle once.  If the task needs to be throttled again it'll need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776)  * re-set at the next time we see the task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	if (unlikely(current->flags & PF_KTHREAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 	if (!blk_get_queue(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	if (current->throttle_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 		blk_put_queue(current->throttle_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 	current->throttle_queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 	if (use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 		current->use_memdelay = use_memdelay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 	set_notify_resume(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)  * blkcg_add_delay - add delay to this blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)  * @blkg: blkg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)  * @now: the current time in nanoseconds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)  * @delta: how many nanoseconds of delay to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)  * Charge @delta to the blkg's current delay accumulation.  This is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)  * throttle tasks if an IO controller thinks we need more throttling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 	if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 	blkcg_scale_delay(blkg, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	atomic64_add(delta, &blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)  * blkg_tryget_closest - try and get a blkg ref on the closet blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813)  * @bio: target bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814)  * @css: target css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)  * As the failure mode here is to walk up the blkg tree, this ensure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)  * blkg->parent pointers are always valid.  This returns the blkg that it ended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)  * up taking a reference on or %NULL if no reference was taken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 		struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	struct blkcg_gq *blkg, *ret_blkg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 	while (blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 		if (blkg_tryget(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 			ret_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 		blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 	return ret_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)  * bio_associate_blkg_from_css - associate a bio with a specified css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)  * @bio: target bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)  * @css: target css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)  * Associate @bio with the blkg found by combining the css's blkg and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)  * request_queue of the @bio.  An association failure is handled by walking up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)  * the blkg tree.  Therefore, the blkg associated can be anything between @blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)  * and q->root_blkg.  This situation only happens when a cgroup is dying and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)  * then the remaining bios will spill to the closest alive blkg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850)  * A reference will be taken on the blkg and will be released when @bio is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851)  * freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) void bio_associate_blkg_from_css(struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 				 struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 	if (bio->bi_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 		blkg_put(bio->bi_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 	if (css && css->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 		bio->bi_blkg = blkg_tryget_closest(bio, css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 		blkg_get(bio->bi_disk->queue->root_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 		bio->bi_blkg = bio->bi_disk->queue->root_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869)  * bio_associate_blkg - associate a bio with a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)  * @bio: target bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872)  * Associate @bio with the blkg found from the bio's css and request_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873)  * If one is not found, bio_lookup_blkg() creates the blkg.  If a blkg is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874)  * already associated, the css is reused and association redone as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875)  * request_queue may have changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) void bio_associate_blkg(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	if (bio->bi_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 		css = &bio_blkcg(bio)->css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		css = blkcg_css();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	bio_associate_blkg_from_css(bio, css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) EXPORT_SYMBOL_GPL(bio_associate_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)  * bio_clone_blkg_association - clone blkg association from src to dst bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)  * @dst: destination bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)  * @src: source bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) void bio_clone_blkg_association(struct bio *dst, struct bio *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 	if (src->bi_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 		if (dst->bi_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 			blkg_put(dst->bi_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 		blkg_get(src->bi_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 		dst->bi_blkg = src->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) static int blk_cgroup_io_type(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	if (op_is_discard(bio->bi_opf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 		return BLKG_IOSTAT_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 	if (op_is_write(bio->bi_opf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 		return BLKG_IOSTAT_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 	return BLKG_IOSTAT_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) void blk_cgroup_bio_start(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	int rwd = blk_cgroup_io_type(bio), cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 	struct blkg_iostat_set *bis;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 	cpu = get_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 	bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 	u64_stats_update_begin(&bis->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 	 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 	 * bio and we would have already accounted for the size of the bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 	if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 		bio_set_flag(bio, BIO_CGROUP_ACCT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 		bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	bis->cur.ios[rwd]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	u64_stats_update_end(&bis->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	if (cgroup_subsys_on_dfl(io_cgrp_subsys))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 		cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) static int __init blkcg_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 	blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 					    WQ_MEM_RECLAIM | WQ_FREEZABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 					    WQ_UNBOUND | WQ_SYSFS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 	if (!blkcg_punt_bio_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) subsys_initcall(blkcg_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) module_param(blkcg_debug_stats, bool, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");