^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Common Block IO controller cgroup interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Based on ideas and code from CFQ, CFS and BFQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Paolo Valente <paolo.valente@unimore.it>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Nauman Rafique <nauman@google.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * For policy-specific per-blkcg data:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Arianna Avanzini <avanzini.arianna@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/ioprio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/kdev_t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/genhd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/ctype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/tracehook.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/psi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include "blk-ioprio.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define MAX_KEY_LEN 100
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * blkcg_pol_register_mutex nests outside of it and synchronizes entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * policy [un]register operations including cgroup file additions /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * removals. Putting cgroup file registration outside blkcg_pol_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * allows grabbing it from cgroup callbacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static DEFINE_MUTEX(blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static DEFINE_MUTEX(blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct blkcg blkcg_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) EXPORT_SYMBOL_GPL(blkcg_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) EXPORT_SYMBOL_GPL(blkcg_root_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) bool blkcg_debug_stats = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static struct workqueue_struct *blkcg_punt_bio_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) static bool blkcg_policy_enabled(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) const struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) return pol && test_bit(pol->plid, q->blkcg_pols);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * blkg_free - free a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * @blkg: blkg to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * Free @blkg which may be partially allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) static void blkg_free(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) for (i = 0; i < BLKCG_MAX_POLS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) if (blkg->pd[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) free_percpu(blkg->iostat_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) percpu_ref_exit(&blkg->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) kfree(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static void __blkg_release(struct rcu_head *rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) WARN_ON(!bio_list_empty(&blkg->async_bios));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) /* release the blkcg and parent blkg refs this blkg has been holding */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) css_put(&blkg->blkcg->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) if (blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) blkg_put(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) blkg_free(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * A group is RCU protected, but having an rcu lock does not mean that one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * can access all the fields of blkg and assume these are valid. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * example, don't try to follow throtl_data and request queue links.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * Having a reference to blkg under an rcu allows accesses to only values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * local to groups like group stats and group rate limits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) static void blkg_release(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) call_rcu(&blkg->rcu_head, __blkg_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) static void blkg_async_bio_workfn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) async_bio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) struct bio_list bios = BIO_EMPTY_LIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) struct bio *bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) bool need_plug = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* as long as there are pending bios, @blkg can't go away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) spin_lock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) bio_list_merge(&bios, &blkg->async_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) bio_list_init(&blkg->async_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) spin_unlock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* start plug only when bio_list contains at least 2 bios */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (bios.head && bios.head->bi_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) need_plug = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) while ((bio = bio_list_pop(&bios)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) submit_bio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (need_plug)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * blkg_alloc - allocate a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * @blkcg: block cgroup the new blkg is associated with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * @q: request_queue the new blkg is associated with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * @gfp_mask: allocation mask to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * Allocate a new blkg assocating @blkcg and @q.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) int i, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) /* alloc and init base part */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if (!blkg->iostat_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) blkg->q = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) INIT_LIST_HEAD(&blkg->q_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) spin_lock_init(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) bio_list_init(&blkg->async_bios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) blkg->blkcg = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) u64_stats_init(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) struct blkg_policy_data *pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (!blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) /* alloc per-policy data and attach it to blkg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) pd = pol->pd_alloc_fn(gfp_mask, q, blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) if (!pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) blkg->pd[i] = pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) pd->blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) pd->plid = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) err_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) blkg_free(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) struct request_queue *q, bool update_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * Hint didn't match. Look up from the radix tree. Note that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * hint can only be updated under queue_lock as otherwise @blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * could have already been removed from blkg_tree. The caller is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * responsible for grabbing queue_lock if @update_hint.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (blkg && blkg->q == q) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if (update_hint) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) lockdep_assert_held(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) rcu_assign_pointer(blkcg->blkg_hint, blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * If @new_blkg is %NULL, this function tries to allocate a new one as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * necessary using %GFP_NOWAIT. @new_blkg is always consumed on return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) struct blkcg_gq *new_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) lockdep_assert_held(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /* request_queue is dying, do not create/recreate a blkg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) if (blk_queue_dying(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) goto err_free_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* blkg holds a reference to blkcg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (!css_tryget_online(&blkcg->css)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) goto err_free_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /* allocate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) if (!new_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (unlikely(!new_blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) goto err_put_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) blkg = new_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /* link parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (blkcg_parent(blkcg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (WARN_ON_ONCE(!blkg->parent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) goto err_put_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) blkg_get(blkg->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) /* invoke per-policy init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) if (blkg->pd[i] && pol->pd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) pol->pd_init_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) /* insert */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (likely(!ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) list_add(&blkg->q_node, &q->blkg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (blkg->pd[i] && pol->pd_online_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) pol->pd_online_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) blkg->online = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) /* @blkg failed fully initialized, use the usual release path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) blkg_put(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) err_put_css:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) css_put(&blkcg->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) err_free_blkg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * blkg_lookup_create - lookup blkg, try to create one if not there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * @blkcg: blkcg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * create one. blkg creation is performed recursively from blkcg_root such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * that all non-root blkg's have access to the parent blkg. This function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * should be called under RCU read lock and takes @q->queue_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * Returns the blkg or the closest blkg if blkg_create() fails as it walks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * down from root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) blkg = blkg_lookup(blkcg, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) if (blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) spin_lock_irqsave(&q->queue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) blkg = __blkg_lookup(blkcg, q, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * Create blkgs walking down from blkcg_root to @blkcg, so that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * non-root blkgs have access to their parents. Returns the closest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * blkg to the intended blkg should blkg_create() fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) struct blkcg *pos = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) struct blkcg *parent = blkcg_parent(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) struct blkcg_gq *ret_blkg = q->root_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) while (parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) blkg = __blkg_lookup(parent, q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /* remember closest blkg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) ret_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) pos = parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) parent = blkcg_parent(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) blkg = blkg_create(pos, q, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) blkg = ret_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (pos == blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) spin_unlock_irqrestore(&q->queue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) return blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) static void blkg_destroy(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) lockdep_assert_held(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) lockdep_assert_held(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) /* Something wrong if we are trying to remove same group twice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) WARN_ON_ONCE(list_empty(&blkg->q_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if (blkg->pd[i] && pol->pd_offline_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) pol->pd_offline_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) blkg->online = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) list_del_init(&blkg->q_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) hlist_del_init_rcu(&blkg->blkcg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * Both setting lookup hint to and clearing it from @blkg are done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * under queue_lock. If it's not pointing to @blkg now, it never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * will. Hint assignment itself can race safely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) rcu_assign_pointer(blkcg->blkg_hint, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * Put the reference taken at the time of creation so that when all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * queues are gone, group can be destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) percpu_ref_kill(&blkg->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * blkg_destroy_all - destroy all blkgs associated with a request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * Destroy all blkgs associated with @q.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) static void blkg_destroy_all(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) struct blkcg_gq *blkg, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) blkg_destroy(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) q->root_blkg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) static int blkcg_reset_stats(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) struct cftype *cftype, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) int i, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) spin_lock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * Note that stat reset is racy - it doesn't synchronize against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * stat updates. This is a debug feature which shouldn't exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * anyway. If you get hit by a race, retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) struct blkg_iostat_set *bis =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) per_cpu_ptr(blkg->iostat_cpu, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) memset(bis, 0, sizeof(*bis));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) memset(&blkg->iostat, 0, sizeof(blkg->iostat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) if (blkg->pd[i] && pol->pd_reset_stats_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) pol->pd_reset_stats_fn(blkg->pd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) spin_unlock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) const char *blkg_dev_name(struct blkcg_gq *blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) /* some drivers (floppy) instantiate a queue w/o disk registered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if (blkg->q->backing_dev_info->dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) return bdi_dev_name(blkg->q->backing_dev_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) * blkcg_print_blkgs - helper for printing per-blkg data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * @sf: seq_file to print to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * @blkcg: blkcg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * @prfill: fill function to print out a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * @pol: policy in question
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * @data: data to be passed to @prfill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * @show_total: to print out sum of prfill return values or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * This function invokes @prfill on each blkg of @blkcg if pd for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * policy specified by @pol exists. @prfill is invoked with @sf, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * policy data and @data and the matching queue lock held. If @show_total
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * is %true, the sum of the return values from @prfill is printed with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * "Total" label at the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * This is to be used to construct print functions for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * cftype->read_seq_string method.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) u64 (*prfill)(struct seq_file *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) struct blkg_policy_data *, int),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) const struct blkcg_policy *pol, int data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) bool show_total)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) u64 total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) spin_lock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) if (blkcg_policy_enabled(blkg->q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) total += prfill(sf, blkg->pd[pol->plid], data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) spin_unlock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (show_total)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) seq_printf(sf, "Total %llu\n", (unsigned long long)total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * __blkg_prfill_u64 - prfill helper for a single u64 value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * @sf: seq_file to print to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * @pd: policy private data of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * @v: value to print
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * Print @v to @sf for the device assocaited with @pd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) const char *dname = blkg_dev_name(pd->blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (!dname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) return v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) /* Performs queue bypass and policy enabled checks then looks up blkg. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) const struct blkcg_policy *pol,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) lockdep_assert_held(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) if (!blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) return ERR_PTR(-EOPNOTSUPP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) return __blkg_lookup(blkcg, q, true /* update_hint */);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * blkg_conf_prep - parse and prepare for per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * @inputp: input string pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * from @input and get and return the matching gendisk. *@inputp is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * updated to point past the device node prefix. Returns an ERR_PTR()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * value on error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * Use this function iff blkg_conf_prep() can't be used for some reason.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) struct gendisk *blkcg_conf_get_disk(char **inputp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) char *input = *inputp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) unsigned int major, minor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) struct gendisk *disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) int key_len, part;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) input += key_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) if (!isspace(*input))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) input = skip_spaces(input);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) disk = get_gendisk(MKDEV(major, minor), &part);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (!disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) return ERR_PTR(-ENODEV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (part) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) put_disk_and_module(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) return ERR_PTR(-ENODEV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) *inputp = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) return disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * blkg_conf_prep - parse and prepare for per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * @blkcg: target block cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * @pol: target policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * @input: input string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * @ctx: blkg_conf_ctx to be filled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) * Parse per-blkg config update from @input and initialize @ctx with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * result. @ctx->blkg points to the blkg to be updated and @ctx->body the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * part of @input following MAJ:MIN. This function returns with RCU read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * lock and queue lock held and must be paired with blkg_conf_finish().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) char *input, struct blkg_conf_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) __acquires(rcu) __acquires(&disk->queue->queue_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) struct gendisk *disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) disk = blkcg_conf_get_disk(&input);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (IS_ERR(disk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return PTR_ERR(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) q = disk->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) blkg = blkg_lookup_check(blkcg, pol, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) ret = PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) goto fail_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) if (blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) goto success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) * Create blkgs walking down from blkcg_root to @blkcg, so that all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) * non-root blkgs have access to their parents.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) struct blkcg *pos = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) struct blkcg *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) struct blkcg_gq *new_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) parent = blkcg_parent(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) while (parent && !__blkg_lookup(parent, q, false)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) pos = parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) parent = blkcg_parent(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) /* Drop locks to do new blkg allocation with GFP_KERNEL. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) if (unlikely(!new_blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) if (radix_tree_preload(GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) blkg = blkg_lookup_check(pos, pol, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) ret = PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) goto fail_preloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) if (blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) blkg_free(new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) blkg = blkg_create(pos, q, new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) if (IS_ERR(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) ret = PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) goto fail_preloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) if (pos == blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) goto success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) success:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) ctx->disk = disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) ctx->blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) ctx->body = input;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) fail_preloaded:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) fail_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) put_disk_and_module(disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * If queue was bypassing, we should retry. Do so after a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * short msleep(). It isn't strictly necessary but queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * can be bypassing for some time and it's always nice to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) * avoid busy looping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) if (ret == -EBUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) msleep(10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) ret = restart_syscall();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) EXPORT_SYMBOL_GPL(blkg_conf_prep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * blkg_conf_finish - finish up per-blkg config update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * Finish up after per-blkg config update. This function must be paired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * with blkg_conf_prep().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) void blkg_conf_finish(struct blkg_conf_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) __releases(&ctx->disk->queue->queue_lock) __releases(rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) spin_unlock_irq(&ctx->disk->queue->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) put_disk_and_module(ctx->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) EXPORT_SYMBOL_GPL(blkg_conf_finish);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) for (i = 0; i < BLKG_IOSTAT_NR; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) dst->bytes[i] = src->bytes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) dst->ios[i] = src->ios[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) for (i = 0; i < BLKG_IOSTAT_NR; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) dst->bytes[i] += src->bytes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) dst->ios[i] += src->ios[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) for (i = 0; i < BLKG_IOSTAT_NR; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) dst->bytes[i] -= src->bytes[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) dst->ios[i] -= src->ios[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct blkcg_gq *parent = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) struct blkg_iostat cur, delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) unsigned int seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) /* fetch the current per-cpu values */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) seq = u64_stats_fetch_begin(&bisc->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) blkg_iostat_set(&cur, &bisc->cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) } while (u64_stats_fetch_retry(&bisc->sync, seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) /* propagate percpu delta to global */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) u64_stats_update_begin(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) blkg_iostat_set(&delta, &cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) blkg_iostat_sub(&delta, &bisc->last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) blkg_iostat_add(&blkg->iostat.cur, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) blkg_iostat_add(&bisc->last, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) u64_stats_update_end(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) /* propagate global delta to parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) if (parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) u64_stats_update_begin(&parent->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) blkg_iostat_set(&delta, &blkg->iostat.cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) blkg_iostat_sub(&delta, &blkg->iostat.last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) blkg_iostat_add(&parent->iostat.cur, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) blkg_iostat_add(&blkg->iostat.last, &delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) u64_stats_update_end(&parent->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) * The rstat algorithms intentionally don't handle the root cgroup to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * incurring overhead when no cgroups are defined. For that reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) * cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) * iostat in the root cgroup's blkcg_gq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) * However, we would like to re-use the printing code between the root and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) * non-root cgroups to the extent possible. For that reason, we simulate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * flushing the root cgroup's stats by explicitly filling in the iostat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * with disk level statistics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) static void blkcg_fill_root_iostats(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) struct class_dev_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) struct device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) while ((dev = class_dev_iter_next(&iter))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) struct gendisk *disk = dev_to_disk(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) struct hd_struct *part = disk_get_part(disk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) struct blkg_iostat tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) memset(&tmp, 0, sizeof(tmp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) struct disk_stats *cpu_dkstats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) cpu_dkstats = per_cpu_ptr(part->dkstats, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) tmp.ios[BLKG_IOSTAT_READ] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) cpu_dkstats->ios[STAT_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) tmp.ios[BLKG_IOSTAT_WRITE] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) cpu_dkstats->ios[STAT_WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) tmp.ios[BLKG_IOSTAT_DISCARD] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) cpu_dkstats->ios[STAT_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) // convert sectors to bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) tmp.bytes[BLKG_IOSTAT_READ] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) cpu_dkstats->sectors[STAT_READ] << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) tmp.bytes[BLKG_IOSTAT_WRITE] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) cpu_dkstats->sectors[STAT_WRITE] << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) tmp.bytes[BLKG_IOSTAT_DISCARD] +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) cpu_dkstats->sectors[STAT_DISCARD] << 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) u64_stats_update_begin(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) blkg_iostat_set(&blkg->iostat.cur, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) u64_stats_update_end(&blkg->iostat.sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) disk_put_part(part);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) static int blkcg_print_stat(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) if (!seq_css(sf)->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) blkcg_fill_root_iostats();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) cgroup_rstat_flush(blkcg->css.cgroup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) struct blkg_iostat_set *bis = &blkg->iostat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) const char *dname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) char *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) u64 rbytes, wbytes, rios, wios, dbytes, dios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) size_t size = seq_get_buf(sf, &buf), off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) bool has_stats = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) spin_lock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) if (!blkg->online)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) dname = blkg_dev_name(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) if (!dname)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) * Hooray string manipulation, count is the size written NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * INCLUDING THE \0, so size is now count+1 less than what we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) * had before, but we want to start writing the next bit from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) * the \0 so we only add count to buf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) off += scnprintf(buf+off, size-off, "%s ", dname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) seq = u64_stats_fetch_begin(&bis->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) rios = bis->cur.ios[BLKG_IOSTAT_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) } while (u64_stats_fetch_retry(&bis->sync, seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) if (rbytes || wbytes || rios || wios) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) has_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) off += scnprintf(buf+off, size-off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) rbytes, wbytes, rios, wios,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) dbytes, dios);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) has_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) off += scnprintf(buf+off, size-off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) " use_delay=%d delay_nsec=%llu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) atomic_read(&blkg->use_delay),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) (unsigned long long)atomic64_read(&blkg->delay_nsec));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) size_t written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) if (!blkg->pd[i] || !pol->pd_stat_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) if (written)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) has_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) off += written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (has_stats) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) if (off < size - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) off += scnprintf(buf+off, size-off, "\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) seq_commit(sf, off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) seq_commit(sf, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) spin_unlock_irq(&blkg->q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) static struct cftype blkcg_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) .name = "stat",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) .seq_show = blkcg_print_stat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) { } /* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) static struct cftype blkcg_legacy_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) .name = "reset_stats",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) .write_u64 = blkcg_reset_stats,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) { } /* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) * blkcg destruction is a three-stage process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * 1. Destruction starts. The blkcg_css_offline() callback is invoked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * which offlines writeback. Here we tie the next stage of blkg destruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) * to the completion of writeback associated with the blkcg. This lets us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * avoid punting potentially large amounts of outstanding writeback to root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * while maintaining any ongoing policies. The next stage is triggered when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) * the nr_cgwbs count goes to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) * and handles the destruction of blkgs. Here the css reference held by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) * the blkg is put back eventually allowing blkcg_css_free() to be called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) * This work may occur in cgwb_release_workfn() on the cgwb_release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) * workqueue. Any submitted ios that fail to get the blkg ref will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) * punted to the root_blkg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) * This finally frees the blkcg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) * blkcg_css_offline - cgroup css_offline callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) * @css: css of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) * This function is called when @css is about to go away. Here the cgwbs are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) * offlined first and only once writeback associated with the blkcg has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * finished do we start step 2 (see above).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) static void blkcg_css_offline(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) /* this prevents anyone from attaching or migrating to this blkcg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) wb_blkcg_offline(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /* put the base online pin allowing step 2 to be triggered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) blkcg_unpin_online(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) * blkcg_destroy_blkgs - responsible for shooting down blkgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) * @blkcg: blkcg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) * is nested inside q lock, this function performs reverse double lock dancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) * Destroying the blkgs releases the reference held on the blkcg's css allowing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) * blkcg_css_free to eventually be called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * This is the blkcg counterpart of ioc_release_fn().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) void blkcg_destroy_blkgs(struct blkcg *blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) spin_lock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) while (!hlist_empty(&blkcg->blkg_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) struct blkcg_gq, blkcg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) struct request_queue *q = blkg->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) if (need_resched() || !spin_trylock(&q->queue_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * Given that the system can accumulate a huge number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * of blkgs in pathological cases, check to see if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) * need to rescheduling to avoid softlockup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) spin_unlock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) spin_lock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) blkg_destroy(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) spin_unlock(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) spin_unlock_irq(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) static void blkcg_css_free(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) list_del(&blkcg->all_blkcgs_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) for (i = 0; i < BLKCG_MAX_POLS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) if (blkcg->cpd[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) kfree(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) static struct cgroup_subsys_state *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) struct cgroup_subsys_state *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) if (!parent_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) blkcg = &blkcg_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) if (!blkcg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) ret = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) for (i = 0; i < BLKCG_MAX_POLS ; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) struct blkcg_policy_data *cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * If the policy hasn't been attached yet, wait for it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * to be attached before doing anything else. Otherwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * check if the policy requires any specific per-cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * data: if it does, allocate and initialize it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (!pol || !pol->cpd_alloc_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) cpd = pol->cpd_alloc_fn(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) if (!cpd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) ret = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) goto free_pd_blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) blkcg->cpd[i] = cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) cpd->blkcg = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) cpd->plid = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) if (pol->cpd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) pol->cpd_init_fn(cpd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) spin_lock_init(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) refcount_set(&blkcg->online_pin, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) INIT_HLIST_HEAD(&blkcg->blkg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) #ifdef CONFIG_CGROUP_WRITEBACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) INIT_LIST_HEAD(&blkcg->cgwb_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) return &blkcg->css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) free_pd_blkcg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) for (i--; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) if (blkcg->cpd[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) if (blkcg != &blkcg_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) kfree(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) static int blkcg_css_online(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) struct blkcg *blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) struct blkcg *parent = blkcg_parent(blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) * blkcg_pin_online() is used to delay blkcg offline so that blkgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) * don't go offline while cgwbs are still active on them. Pin the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) * parent so that offline always happens towards the root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) blkcg_pin_online(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * blkcg_init_queue - initialize blkcg part of request queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * @q: request_queue to initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * Called from blk_alloc_queue(). Responsible for initializing blkcg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * part of new request_queue @q.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) * RETURNS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * 0 on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) int blkcg_init_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) struct blkcg_gq *new_blkg, *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) bool preloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (!new_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) preloaded = !radix_tree_preload(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) /* Make sure the root blkg exists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) blkg = blkg_create(&blkcg_root, q, new_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) if (IS_ERR(blkg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) goto err_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) q->root_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) if (preloaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) ret = blk_ioprio_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) goto err_destroy_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) ret = blk_throtl_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) goto err_destroy_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) ret = blk_iolatency_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) blk_throtl_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) goto err_destroy_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) err_destroy_all:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) blkg_destroy_all(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) err_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) if (preloaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) radix_tree_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) return PTR_ERR(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) * blkcg_exit_queue - exit and release blkcg part of request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) * @q: request_queue being released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) * Called from blk_exit_queue(). Responsible for exiting blkcg part.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) void blkcg_exit_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) blkg_destroy_all(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) blk_throtl_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) * We cannot support shared io contexts, as we have no mean to support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) * two tasks with the same ioc in two different groups without major rework
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) * of the main cic data structures. For now we allow a task to change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * its cgroup only if it's the only owner of its ioc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) static int blkcg_can_attach(struct cgroup_taskset *tset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) struct cgroup_subsys_state *dst_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) struct io_context *ioc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) /* task_lock() is needed to avoid races with exit_io_context() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) cgroup_taskset_for_each(task, dst_css, tset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) task_lock(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) ioc = task->io_context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) if (ioc && atomic_read(&ioc->nr_tasks) > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) task_unlock(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) static void blkcg_bind(struct cgroup_subsys_state *root_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) for (i = 0; i < BLKCG_MAX_POLS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) struct blkcg_policy *pol = blkcg_policy[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) if (!pol || !pol->cpd_bind_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) if (blkcg->cpd[pol->plid])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) static void blkcg_exit(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) if (tsk->throttle_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) blk_put_queue(tsk->throttle_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) tsk->throttle_queue = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) struct cgroup_subsys io_cgrp_subsys = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) .css_alloc = blkcg_css_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) .css_online = blkcg_css_online,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) .css_offline = blkcg_css_offline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) .css_free = blkcg_css_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) .can_attach = blkcg_can_attach,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) .css_rstat_flush = blkcg_rstat_flush,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) .bind = blkcg_bind,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) .dfl_cftypes = blkcg_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) .legacy_cftypes = blkcg_legacy_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) .legacy_name = "blkio",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) .exit = blkcg_exit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) #ifdef CONFIG_MEMCG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) * This ensures that, if available, memcg is automatically enabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) * together on the default hierarchy so that the owner cgroup can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) * be retrieved from writeback pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) .depends_on = 1 << memory_cgrp_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) EXPORT_SYMBOL_GPL(io_cgrp_subsys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * blkcg_activate_policy - activate a blkcg policy on a request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) * @pol: blkcg policy to activate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) * bypass mode to populate its blkgs with policy_data for @pol.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) * Activation happens with @q bypassed, so nobody would be accessing blkgs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) * from IO path. Update of each blkg is protected by both queue and blkcg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * locks so that holding either lock and testing blkcg_policy_enabled() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) * always enough for dereferencing policy data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) * The caller is responsible for synchronizing [de]activations and policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) * [un]registerations. Returns 0 on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) int blkcg_activate_policy(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) const struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) struct blkg_policy_data *pd_prealloc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) struct blkcg_gq *blkg, *pinned_blkg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) /* blkg_list is pushed at the head, reverse walk to allocate parents first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) struct blkg_policy_data *pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) if (blkg->pd[pol->plid])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) /* If prealloc matches, use it; otherwise try GFP_NOWAIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) if (blkg == pinned_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) pd = pd_prealloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) pd_prealloc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) blkg->blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) if (!pd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) * GFP_NOWAIT failed. Free the existing one and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) * prealloc for @blkg w/ GFP_KERNEL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) if (pinned_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) blkg_put(pinned_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) blkg_get(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) pinned_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) if (pd_prealloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) pol->pd_free_fn(pd_prealloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) blkg->blkcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) if (pd_prealloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) goto enomem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) blkg->pd[pol->plid] = pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) pd->blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) pd->plid = pol->plid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) /* all allocated, init in the same order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) if (pol->pd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) pol->pd_init_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) __set_bit(pol->plid, q->blkcg_pols);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) if (pinned_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) blkg_put(pinned_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) if (pd_prealloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) pol->pd_free_fn(pd_prealloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) enomem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) /* alloc failed, nothing's initialized yet, free everything */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) list_for_each_entry(blkg, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) if (blkg->pd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) pol->pd_free_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) blkg->pd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) EXPORT_SYMBOL_GPL(blkcg_activate_policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) * @q: request_queue of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) * @pol: blkcg policy to deactivate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) * Deactivate @pol on @q. Follows the same synchronization rules as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) * blkcg_activate_policy().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) void blkcg_deactivate_policy(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) const struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) if (!blkcg_policy_enabled(q, pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) spin_lock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) __clear_bit(pol->plid, q->blkcg_pols);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) list_for_each_entry(blkg, &q->blkg_list, q_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) struct blkcg *blkcg = blkg->blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) spin_lock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) if (blkg->pd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) if (pol->pd_offline_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) pol->pd_offline_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) pol->pd_free_fn(blkg->pd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) blkg->pd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) spin_unlock(&blkcg->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) spin_unlock_irq(&q->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) * blkcg_policy_register - register a blkcg policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * @pol: blkcg policy to register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * Register @pol with blkcg core. Might sleep and @pol may be modified on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) * successful registration. Returns 0 on success and -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) int blkcg_policy_register(struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) mutex_lock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) /* find an empty slot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) for (i = 0; i < BLKCG_MAX_POLS; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) if (!blkcg_policy[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) if (i >= BLKCG_MAX_POLS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) goto err_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) /* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) (!pol->pd_alloc_fn ^ !pol->pd_free_fn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) goto err_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /* register @pol */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) pol->plid = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) blkcg_policy[pol->plid] = pol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) /* allocate and install cpd's */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) if (pol->cpd_alloc_fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) struct blkcg_policy_data *cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) cpd = pol->cpd_alloc_fn(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (!cpd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) goto err_free_cpds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) blkcg->cpd[pol->plid] = cpd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) cpd->blkcg = blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) cpd->plid = pol->plid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) if (pol->cpd_init_fn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) pol->cpd_init_fn(cpd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) /* everything is in place, add intf files for the new policy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) if (pol->dfl_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) WARN_ON(cgroup_add_dfl_cftypes(&io_cgrp_subsys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) pol->dfl_cftypes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) if (pol->legacy_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) WARN_ON(cgroup_add_legacy_cftypes(&io_cgrp_subsys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) pol->legacy_cftypes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) mutex_unlock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) err_free_cpds:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) if (pol->cpd_free_fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) if (blkcg->cpd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) pol->cpd_free_fn(blkcg->cpd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) blkcg->cpd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) blkcg_policy[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) err_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) mutex_unlock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) EXPORT_SYMBOL_GPL(blkcg_policy_register);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * blkcg_policy_unregister - unregister a blkcg policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * @pol: blkcg policy to unregister
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * Undo blkcg_policy_register(@pol). Might sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) void blkcg_policy_unregister(struct blkcg_policy *pol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) mutex_lock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) if (WARN_ON(blkcg_policy[pol->plid] != pol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) /* kill the intf files first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) if (pol->dfl_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) cgroup_rm_cftypes(pol->dfl_cftypes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) if (pol->legacy_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) cgroup_rm_cftypes(pol->legacy_cftypes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) /* remove cpds and unregister */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) mutex_lock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) if (pol->cpd_free_fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) if (blkcg->cpd[pol->plid]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) pol->cpd_free_fn(blkcg->cpd[pol->plid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) blkcg->cpd[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) blkcg_policy[pol->plid] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) mutex_unlock(&blkcg_pol_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) mutex_unlock(&blkcg_pol_register_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) bool __blkcg_punt_bio_submit(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) struct blkcg_gq *blkg = bio->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) /* consume the flag first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) bio->bi_opf &= ~REQ_CGROUP_PUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) /* never bounce for the root cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) if (!blkg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) spin_lock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) bio_list_add(&blkg->async_bios, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) spin_unlock_bh(&blkg->async_bio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) * Scale the accumulated delay based on how long it has been since we updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) * the delay. We only call this when we are adding delay, in case it's been a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) * while since we added delay, and when we are checking to see if we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) * delay a task, to account for any delays that may have occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) u64 old = atomic64_read(&blkg->delay_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) /* negative use_delay means no scaling, see blkcg_set_delay() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (atomic_read(&blkg->use_delay) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) * We only want to scale down every second. The idea here is that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) * time window. We only want to throttle tasks for recent delay that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * has occurred, in 1 second time windows since that's the maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * things can be throttled. We save the current delay window in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * blkg->last_delay so we know what amount is still left to be charged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * to the blkg from this point onward. blkg->last_use keeps track of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) * the use_delay counter. The idea is if we're unthrottling the blkg we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) * are ok with whatever is happening now, and we can take away more of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) * the accumulated delay as we've already throttled enough that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) * everybody is happy with their IO latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) if (time_before64(old + NSEC_PER_SEC, now) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) atomic64_cmpxchg(&blkg->delay_start, old, now) == old) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) u64 cur = atomic64_read(&blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) u64 sub = min_t(u64, blkg->last_delay, now - old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) int cur_use = atomic_read(&blkg->use_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) * We've been unthrottled, subtract a larger chunk of our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) * accumulated delay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) if (cur_use < blkg->last_use)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) sub = max_t(u64, sub, blkg->last_delay >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) * This shouldn't happen, but handle it anyway. Our delay_nsec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) * should only ever be growing except here where we subtract out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) * min(last_delay, 1 second), but lord knows bugs happen and I'd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) * rather not end up with negative numbers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) if (unlikely(cur < sub)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) atomic64_set(&blkg->delay_nsec, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) blkg->last_delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) atomic64_sub(sub, &blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) blkg->last_delay = cur - sub;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) blkg->last_use = cur_use;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) * This is called when we want to actually walk up the hierarchy and check to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) * see if we need to throttle, and then actually throttle if there is some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) * accumulated delay. This should only be called upon return to user space so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) * we're not holding some lock that would induce a priority inversion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) unsigned long pflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) bool clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) u64 now = ktime_to_ns(ktime_get());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) u64 exp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) u64 delay_nsec = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) int tok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) while (blkg->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) int use_delay = atomic_read(&blkg->use_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) if (use_delay) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) u64 this_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) blkcg_scale_delay(blkg, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) this_delay = atomic64_read(&blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) if (this_delay > delay_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) delay_nsec = this_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) clamp = use_delay > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) if (!delay_nsec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) * Let's not sleep for all eternity if we've amassed a huge delay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) * Swapping or metadata IO can accumulate 10's of seconds worth of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) * delay, and we want userspace to be able to do _something_ so cap the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) * delays at 0.25s. If there's 10's of seconds worth of delay then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) * tasks will be delayed for 0.25 second for every syscall. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) * blkcg_set_delay() was used as indicated by negative use_delay, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) * caller is responsible for regulating the range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) if (clamp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) if (use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) psi_memstall_enter(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) exp = ktime_add_ns(now, delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) tok = io_schedule_prepare();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) __set_current_state(TASK_KILLABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) if (!schedule_hrtimeout(&exp, HRTIMER_MODE_ABS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) } while (!fatal_signal_pending(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) io_schedule_finish(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) if (use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) psi_memstall_leave(&pflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) * blkcg_maybe_throttle_current - throttle the current task if it has been marked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) * This is only called if we've been marked with set_notify_resume(). Obviously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) * we can be set_notify_resume() for reasons other than blkcg throttling, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) * check to see if current->throttle_queue is set and if not this doesn't do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) * anything. This should only ever be called by the resume code, it's not meant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) * to be called by people willy-nilly as it will actually do the work to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) * throttle the task if it is setup for throttling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) void blkcg_maybe_throttle_current(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) struct request_queue *q = current->throttle_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) struct blkcg *blkcg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) struct blkcg_gq *blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) bool use_memdelay = current->use_memdelay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) current->throttle_queue = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) current->use_memdelay = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) css = kthread_blkcg();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) if (css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) blkcg = css_to_blkcg(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) if (!blkcg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) blkg = blkg_lookup(blkcg, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) if (!blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) if (!blkg_tryget(blkg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) blkcg_maybe_throttle_blkg(blkg, use_memdelay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) blkg_put(blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) blk_put_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) blk_put_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) * blkcg_schedule_throttle - this task needs to check for throttling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) * @q: the request queue IO was submitted on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) * @use_memdelay: do we charge this to memory delay for PSI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) * This is called by the IO controller when we know there's delay accumulated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) * for the blkg for this task. We do not pass the blkg because there are places
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) * we call this that may not have that information, the swapping code for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) * instance will only have a request_queue at that point. This set's the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) * notify_resume for the task to check and see if it requires throttling before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) * returning to user space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) * We will only schedule once per syscall. You can call this over and over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * again and it will only do the check once upon return to user space, and only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) * throttle once. If the task needs to be throttled again it'll need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) * re-set at the next time we see the task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) if (unlikely(current->flags & PF_KTHREAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) if (!blk_get_queue(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) if (current->throttle_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) blk_put_queue(current->throttle_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) current->throttle_queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) if (use_memdelay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) current->use_memdelay = use_memdelay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) set_notify_resume(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) * blkcg_add_delay - add delay to this blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) * @blkg: blkg of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) * @now: the current time in nanoseconds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) * @delta: how many nanoseconds of delay to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) * Charge @delta to the blkg's current delay accumulation. This is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) * throttle tasks if an IO controller thinks we need more throttling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) blkcg_scale_delay(blkg, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) atomic64_add(delta, &blkg->delay_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) * blkg_tryget_closest - try and get a blkg ref on the closet blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) * @bio: target bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * @css: target css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) * As the failure mode here is to walk up the blkg tree, this ensure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) * blkg->parent pointers are always valid. This returns the blkg that it ended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) * up taking a reference on or %NULL if no reference was taken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) struct blkcg_gq *blkg, *ret_blkg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) while (blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) if (blkg_tryget(blkg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) ret_blkg = blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) blkg = blkg->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) return ret_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * bio_associate_blkg_from_css - associate a bio with a specified css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * @bio: target bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) * @css: target css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * Associate @bio with the blkg found by combining the css's blkg and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * request_queue of the @bio. An association failure is handled by walking up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) * the blkg tree. Therefore, the blkg associated can be anything between @blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) * and q->root_blkg. This situation only happens when a cgroup is dying and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) * then the remaining bios will spill to the closest alive blkg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) * A reference will be taken on the blkg and will be released when @bio is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) * freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) void bio_associate_blkg_from_css(struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) if (bio->bi_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) blkg_put(bio->bi_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) if (css && css->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) bio->bi_blkg = blkg_tryget_closest(bio, css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) blkg_get(bio->bi_disk->queue->root_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) bio->bi_blkg = bio->bi_disk->queue->root_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) * bio_associate_blkg - associate a bio with a blkg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * @bio: target bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) * Associate @bio with the blkg found from the bio's css and request_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) * already associated, the css is reused and association redone as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) * request_queue may have changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) void bio_associate_blkg(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) if (bio->bi_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) css = &bio_blkcg(bio)->css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) css = blkcg_css();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) bio_associate_blkg_from_css(bio, css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) EXPORT_SYMBOL_GPL(bio_associate_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) * bio_clone_blkg_association - clone blkg association from src to dst bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) * @dst: destination bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) * @src: source bio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) void bio_clone_blkg_association(struct bio *dst, struct bio *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) if (src->bi_blkg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) if (dst->bi_blkg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) blkg_put(dst->bi_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) blkg_get(src->bi_blkg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) dst->bi_blkg = src->bi_blkg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) static int blk_cgroup_io_type(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) if (op_is_discard(bio->bi_opf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) return BLKG_IOSTAT_DISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) if (op_is_write(bio->bi_opf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) return BLKG_IOSTAT_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) return BLKG_IOSTAT_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) void blk_cgroup_bio_start(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) int rwd = blk_cgroup_io_type(bio), cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) struct blkg_iostat_set *bis;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) cpu = get_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) u64_stats_update_begin(&bis->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) * bio and we would have already accounted for the size of the bio.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) bio_set_flag(bio, BIO_CGROUP_ACCT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) bis->cur.ios[rwd]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) u64_stats_update_end(&bis->sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) if (cgroup_subsys_on_dfl(io_cgrp_subsys))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) static int __init blkcg_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) WQ_MEM_RECLAIM | WQ_FREEZABLE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) WQ_UNBOUND | WQ_SYSFS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) if (!blkcg_punt_bio_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) subsys_initcall(blkcg_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) module_param(blkcg_debug_stats, bool, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");