^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Block multiqueue core code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2013-2014 Jens Axboe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 2013-2014 Christoph Hellwig
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/bio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/kmemleak.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/workqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/llist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/list_sort.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/cache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/sched/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/sched/topology.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/crash_dump.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/blk-crypto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <trace/events/block.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/blk-mq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/t10-pi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include "blk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include "blk-mq.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include "blk-mq-debugfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include "blk-mq-tag.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include "blk-pm.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include "blk-stat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include "blk-mq-sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include "blk-rq-qos.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <trace/hooks/block.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static void blk_mq_poll_stats_start(struct request_queue *q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) static int blk_mq_poll_stats_bkt(const struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) int ddir, sectors, bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) ddir = rq_data_dir(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) sectors = blk_rq_stats_sectors(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) bucket = ddir + 2 * ilog2(sectors);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (bucket < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) return bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * Check if any of the ctx, dispatch list or elevator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * have pending work in this hardware queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) return !list_empty_careful(&hctx->dispatch) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) sbitmap_any_bit_set(&hctx->ctx_map) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) blk_mq_sched_has_work(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * Mark this ctx as having pending work in this hardware queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) struct blk_mq_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) const int bit = ctx->index_hw[hctx->type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) if (!sbitmap_test_bit(&hctx->ctx_map, bit))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) sbitmap_set_bit(&hctx->ctx_map, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct blk_mq_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) const int bit = ctx->index_hw[hctx->type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) sbitmap_clear_bit(&hctx->ctx_map, bit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct mq_inflight {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct hd_struct *part;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) unsigned int inflight[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) struct request *rq, void *priv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) bool reserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) struct mq_inflight *mi = priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if ((!mi->part->partno || rq->part == mi->part) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) mi->inflight[rq_data_dir(rq)]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct mq_inflight mi = { .part = part };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) return mi.inflight[0] + mi.inflight[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) unsigned int inflight[2])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct mq_inflight mi = { .part = part };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) inflight[0] = mi.inflight[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) inflight[1] = mi.inflight[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) void blk_freeze_queue_start(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) mutex_lock(&q->mq_freeze_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (++q->mq_freeze_depth == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) percpu_ref_kill(&q->q_usage_counter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) mutex_unlock(&q->mq_freeze_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) if (queue_is_mq(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) blk_mq_run_hw_queues(q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) mutex_unlock(&q->mq_freeze_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) void blk_mq_freeze_queue_wait(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) unsigned long timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return wait_event_timeout(q->mq_freeze_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) percpu_ref_is_zero(&q->q_usage_counter),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * Guarantee no request is in use, so we can change any data structure of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * the queue afterward.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) void blk_freeze_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * In the !blk_mq case we are only calling this to kill the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * q_usage_counter, otherwise this increases the freeze depth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * and waits for it to return to zero. For this reason there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * no blk_unfreeze_queue(), and blk_freeze_queue() is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * exported to drivers as the only user for unfreeze is blk_mq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) blk_freeze_queue_start(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) blk_mq_freeze_queue_wait(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) void blk_mq_freeze_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * ...just an alias to keep freeze and unfreeze actions balanced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * in the blk_mq_* namespace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) blk_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) void blk_mq_unfreeze_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) mutex_lock(&q->mq_freeze_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) q->mq_freeze_depth--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) WARN_ON_ONCE(q->mq_freeze_depth < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) if (!q->mq_freeze_depth) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) percpu_ref_resurrect(&q->q_usage_counter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) wake_up_all(&q->mq_freeze_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) mutex_unlock(&q->mq_freeze_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * mpt3sas driver such that this function can be removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) void blk_mq_quiesce_queue_nowait(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * @q: request queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * Note: this function does not prevent that the struct request end_io()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * callback function is invoked. Once this function is returned, we make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * sure no dispatch can happen until the queue is unquiesced via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * blk_mq_unquiesce_queue().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) void blk_mq_quiesce_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) bool rcu = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) blk_mq_quiesce_queue_nowait(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if (hctx->flags & BLK_MQ_F_BLOCKING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) synchronize_srcu(hctx->srcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) rcu = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * @q: request queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * This function recovers queue into the state before quiescing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * which is done by blk_mq_quiesce_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) void blk_mq_unquiesce_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /* dispatch requests which are inserted during quiescing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) blk_mq_run_hw_queues(q, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) void blk_mq_wake_waiters(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) queue_for_each_hw_ctx(q, hctx, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (blk_mq_hw_queue_mapped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) blk_mq_tag_wakeup_all(hctx->tags, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * Only need start/end time stamping if we have iostat or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * blk stats enabled, or using an IO scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) static inline bool blk_mq_need_time_stamp(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) unsigned int tag, u64 alloc_time_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) struct request *rq = tags->static_rqs[tag];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) if (data->q->elevator) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) rq->tag = BLK_MQ_NO_TAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) rq->internal_tag = tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) rq->tag = tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) rq->internal_tag = BLK_MQ_NO_TAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /* csd/requeue_work/fifo_time is initialized before use */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) rq->q = data->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) rq->mq_ctx = data->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) rq->mq_hctx = data->hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) rq->rq_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) rq->cmd_flags = data->cmd_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) if (data->flags & BLK_MQ_REQ_PM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) rq->rq_flags |= RQF_PM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (blk_queue_io_stat(data->q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) rq->rq_flags |= RQF_IO_STAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) INIT_LIST_HEAD(&rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) INIT_HLIST_NODE(&rq->hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) RB_CLEAR_NODE(&rq->rb_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) rq->rq_disk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) rq->part = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) #ifdef CONFIG_BLK_RQ_ALLOC_TIME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) rq->alloc_time_ns = alloc_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) if (blk_mq_need_time_stamp(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) rq->start_time_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) rq->start_time_ns = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) rq->io_start_time_ns = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) rq->stats_sectors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) rq->nr_phys_segments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) #if defined(CONFIG_BLK_DEV_INTEGRITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) rq->nr_integrity_segments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) blk_crypto_rq_set_defaults(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) /* tag was already set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) WRITE_ONCE(rq->deadline, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) rq->timeout = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) rq->end_io = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) rq->end_io_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) refcount_set(&rq->ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (!op_is_flush(data->cmd_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) struct elevator_queue *e = data->q->elevator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) rq->elv.icq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (e && e->type->ops.prepare_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (e->type->icq_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) blk_mq_sched_assign_ioc(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) e->type->ops.prepare_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) rq->rq_flags |= RQF_ELVPRIV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) data->hctx->queued++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) trace_android_vh_blk_rq_ctx_init(rq, tags, data, alloc_time_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) return rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) struct request_queue *q = data->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) struct elevator_queue *e = q->elevator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) u64 alloc_time_ns = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) unsigned int tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) /* alloc_time includes depth and tag waits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (blk_queue_rq_alloc_time(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) alloc_time_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) if (data->cmd_flags & REQ_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) data->flags |= BLK_MQ_REQ_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (e) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * Flush requests are special and go directly to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * dispatch list. Don't include reserved tags in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) * limiting, as it isn't useful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) if (!op_is_flush(data->cmd_flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) e->type->ops.limit_depth &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) !(data->flags & BLK_MQ_REQ_RESERVED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) e->type->ops.limit_depth(data->cmd_flags, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) data->ctx = blk_mq_get_ctx(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (!e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) blk_mq_tag_busy(data->hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * Waiting allocations only fail because of an inactive hctx. In that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * case just retry the hctx assignment and tag allocation as CPU hotplug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * should have migrated us to an online CPU by now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) tag = blk_mq_get_tag(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) if (tag == BLK_MQ_NO_TAG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (data->flags & BLK_MQ_REQ_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * Give up the CPU and sleep for a random short time to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * that thread using a realtime scheduling class are migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * off the CPU, and thus off the hctx that is going away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) msleep(3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) blk_mq_req_flags_t flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) struct blk_mq_alloc_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) .q = q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) .flags = flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) .cmd_flags = op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) ret = blk_queue_enter(q, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) rq = __blk_mq_alloc_request(&data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) if (!rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) goto out_queue_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) rq->__data_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) rq->__sector = (sector_t) -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) rq->bio = rq->biotail = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) return rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) out_queue_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) blk_queue_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) return ERR_PTR(-EWOULDBLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) EXPORT_SYMBOL(blk_mq_alloc_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) struct blk_mq_alloc_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) .q = q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) .flags = flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) .cmd_flags = op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) u64 alloc_time_ns = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) unsigned int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) unsigned int tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) /* alloc_time includes depth and tag waits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (blk_queue_rq_alloc_time(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) alloc_time_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * If the tag allocator sleeps we could get an allocation for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * different hardware context. No need to complicate the low level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * allocator for this for the rare use case of a command tied to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * a specific queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) if (hctx_idx >= q->nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return ERR_PTR(-EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) ret = blk_queue_enter(q, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * Check if the hardware context is actually mapped to anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * If not tell the caller that it should skip this queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) ret = -EXDEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) data.hctx = q->queue_hw_ctx[hctx_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) if (!blk_mq_hw_queue_mapped(data.hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) goto out_queue_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) data.ctx = __blk_mq_get_ctx(q, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) if (!q->elevator)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) blk_mq_tag_busy(data.hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) ret = -EWOULDBLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) tag = blk_mq_get_tag(&data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if (tag == BLK_MQ_NO_TAG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) goto out_queue_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) out_queue_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) blk_queue_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) static void __blk_mq_free_request(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct blk_mq_ctx *ctx = rq->mq_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) const int sched_tag = rq->internal_tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) blk_crypto_free_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) blk_pm_mark_last_busy(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) rq->mq_hctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) if (rq->tag != BLK_MQ_NO_TAG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) blk_mq_put_tag(hctx->tags, ctx, rq->tag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) if (sched_tag != BLK_MQ_NO_TAG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) blk_mq_sched_restart(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) blk_queue_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) void blk_mq_free_request(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) struct elevator_queue *e = q->elevator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) struct blk_mq_ctx *ctx = rq->mq_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) if (rq->rq_flags & RQF_ELVPRIV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) if (e && e->type->ops.finish_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) e->type->ops.finish_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) if (rq->elv.icq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) put_io_context(rq->elv.icq->ioc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) rq->elv.icq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) ctx->rq_completed[rq_is_sync(rq)]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (rq->rq_flags & RQF_MQ_INFLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) __blk_mq_dec_active_requests(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) laptop_io_completion(q->backing_dev_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) rq_qos_done(q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) WRITE_ONCE(rq->state, MQ_RQ_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (refcount_dec_and_test(&rq->ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) __blk_mq_free_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) EXPORT_SYMBOL_GPL(blk_mq_free_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) u64 now = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (blk_mq_need_time_stamp(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) now = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) if (rq->rq_flags & RQF_STATS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) blk_mq_poll_stats_start(rq->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) blk_stat_add(rq, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) blk_mq_sched_completed_request(rq, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) blk_account_io_done(rq, now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (rq->end_io) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) rq_qos_done(rq->q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) rq->end_io(rq, error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) blk_mq_free_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) EXPORT_SYMBOL(__blk_mq_end_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) void blk_mq_end_request(struct request *rq, blk_status_t error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) if (blk_update_request(rq, error, blk_rq_bytes(rq)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) __blk_mq_end_request(rq, error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) EXPORT_SYMBOL(blk_mq_end_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * Softirq action handler - move entries to local list and loop over them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * while passing them to the queue registered handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) static __latent_entropy void blk_done_softirq(struct softirq_action *h)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct list_head *cpu_list, local_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) cpu_list = this_cpu_ptr(&blk_cpu_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) list_replace_init(cpu_list, &local_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) while (!list_empty(&local_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) rq = list_entry(local_list.next, struct request, ipi_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) list_del_init(&rq->ipi_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) rq->q->mq_ops->complete(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) static void blk_mq_trigger_softirq(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) struct list_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) list = this_cpu_ptr(&blk_cpu_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) list_add_tail(&rq->ipi_list, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) * If the list only contains our just added request, signal a raise of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * the softirq. If there are already entries there, someone already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * raised the irq but it hasn't run yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) if (list->next == &rq->ipi_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) raise_softirq_irqoff(BLOCK_SOFTIRQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) static int blk_softirq_cpu_dead(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * If a CPU goes away, splice its entries to the current CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * and trigger a run of the softirq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) list_splice_init(&per_cpu(blk_cpu_done, cpu),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) this_cpu_ptr(&blk_cpu_done));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) raise_softirq_irqoff(BLOCK_SOFTIRQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) static void __blk_mq_complete_request_remote(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) struct request *rq = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) * For most of single queue controllers, there is only one irq vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) * for handling I/O completion, and the only irq's affinity is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) * to all possible CPUs. On most of ARCHs, this affinity means the irq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) * is handled on one specific CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) * So complete I/O requests in softirq context in case of single queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) * devices to avoid degrading I/O performance due to irqsoff latency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) if (rq->q->nr_hw_queues == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) blk_mq_trigger_softirq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) rq->q->mq_ops->complete(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) static inline bool blk_mq_complete_need_ipi(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) int cpu = raw_smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) if (!IS_ENABLED(CONFIG_SMP) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) /* same CPU or cache domain? Complete locally */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) if (cpu == rq->mq_ctx->cpu ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) cpus_share_cache(cpu, rq->mq_ctx->cpu)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) /* don't try to IPI to an offline CPU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) return cpu_online(rq->mq_ctx->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) bool blk_mq_complete_request_remote(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) * For a polled request, always complete locallly, it's pointless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) * to redirect the completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (rq->cmd_flags & REQ_HIPRI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) if (blk_mq_complete_need_ipi(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) rq->csd.func = __blk_mq_complete_request_remote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) rq->csd.info = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) rq->csd.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) if (rq->q->nr_hw_queues > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) blk_mq_trigger_softirq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * blk_mq_complete_request - end I/O on a request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * @rq: the request being processed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * Complete a request by scheduling the ->complete_rq operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) void blk_mq_complete_request(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) if (!blk_mq_complete_request_remote(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) rq->q->mq_ops->complete(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) EXPORT_SYMBOL(blk_mq_complete_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) __releases(hctx->srcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (!(hctx->flags & BLK_MQ_F_BLOCKING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) srcu_read_unlock(hctx->srcu, srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) __acquires(hctx->srcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) /* shut up gcc false positive */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) *srcu_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) *srcu_idx = srcu_read_lock(hctx->srcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) * blk_mq_start_request - Start processing a request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) * @rq: Pointer to request to be started
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) * Function used by device drivers to notify the block layer that a request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) * is going to be processed now, so blk layer can do proper initializations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) * such as starting the timeout timer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) void blk_mq_start_request(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) trace_block_rq_issue(q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) rq->io_start_time_ns = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) rq->stats_sectors = blk_rq_sectors(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) rq->rq_flags |= RQF_STATS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) rq_qos_issue(q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) blk_add_timer(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) #ifdef CONFIG_BLK_DEV_INTEGRITY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) q->integrity.profile->prepare_fn(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) EXPORT_SYMBOL(blk_mq_start_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) static void __blk_mq_requeue_request(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) blk_mq_put_driver_tag(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) trace_block_rq_requeue(q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) rq_qos_requeue(q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) if (blk_mq_request_started(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) WRITE_ONCE(rq->state, MQ_RQ_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) rq->rq_flags &= ~RQF_TIMED_OUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) __blk_mq_requeue_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) /* this request will be re-inserted to io scheduler queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) blk_mq_sched_requeue_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) EXPORT_SYMBOL(blk_mq_requeue_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) static void blk_mq_requeue_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) struct request_queue *q =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) container_of(work, struct request_queue, requeue_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) LIST_HEAD(rq_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) struct request *rq, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) spin_lock_irq(&q->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) list_splice_init(&q->requeue_list, &rq_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) spin_unlock_irq(&q->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) rq->rq_flags &= ~RQF_SOFTBARRIER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) list_del_init(&rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) * If RQF_DONTPREP, rq has contained some driver specific
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) * data, so insert it to hctx dispatch list to avoid any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) * merge.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) if (rq->rq_flags & RQF_DONTPREP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) blk_mq_request_bypass_insert(rq, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) blk_mq_sched_insert_request(rq, true, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) while (!list_empty(&rq_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) rq = list_entry(rq_list.next, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) list_del_init(&rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) blk_mq_sched_insert_request(rq, false, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) blk_mq_run_hw_queues(q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) bool kick_requeue_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) * We abuse this flag that is otherwise used by the I/O scheduler to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) * request head insertion from the workqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) spin_lock_irqsave(&q->requeue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (at_head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) rq->rq_flags |= RQF_SOFTBARRIER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) list_add(&rq->queuelist, &q->requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) list_add_tail(&rq->queuelist, &q->requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) spin_unlock_irqrestore(&q->requeue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) if (kick_requeue_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) blk_mq_kick_requeue_list(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) void blk_mq_kick_requeue_list(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) EXPORT_SYMBOL(blk_mq_kick_requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) void blk_mq_delay_kick_requeue_list(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) unsigned long msecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) msecs_to_jiffies(msecs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) if (tag < tags->nr_tags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) prefetch(tags->rqs[tag]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) return tags->rqs[tag];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) EXPORT_SYMBOL(blk_mq_tag_to_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) void *priv, bool reserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) * If we find a request that isn't idle and the queue matches,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) * we know the queue is busy. Return false to stop the iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (blk_mq_request_started(rq) && rq->q == hctx->queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) bool *busy = priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) *busy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) bool blk_mq_queue_inflight(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) bool busy = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) blk_mq_queue_tag_busy_iter(q, blk_mq_rq_inflight, &busy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) return busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) static void blk_mq_rq_timed_out(struct request *req, bool reserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) req->rq_flags |= RQF_TIMED_OUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) if (req->q->mq_ops->timeout) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) enum blk_eh_timer_return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) ret = req->q->mq_ops->timeout(req, reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) if (ret == BLK_EH_DONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) blk_add_timer(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) unsigned long deadline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (rq->rq_flags & RQF_TIMED_OUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) deadline = READ_ONCE(rq->deadline);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) if (time_after_eq(jiffies, deadline))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) if (*next == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) *next = deadline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) else if (time_after(*next, deadline))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) *next = deadline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) void blk_mq_put_rq_ref(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) if (is_flush_rq(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) rq->end_io(rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) else if (refcount_dec_and_test(&rq->ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) __blk_mq_free_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) struct request *rq, void *priv, bool reserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) unsigned long *next = priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) * be reallocated underneath the timeout handler's processing, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) * the expire check is reliable. If the request is not expired, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * it was completed and reallocated as a new request after returning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) * from blk_mq_check_expired().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (blk_mq_req_expired(rq, next))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) blk_mq_rq_timed_out(rq, reserved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) static void blk_mq_timeout_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) struct request_queue *q =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) container_of(work, struct request_queue, timeout_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) unsigned long next = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) /* A deadlock might occur if a request is stuck requiring a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) * timeout at the same time a queue freeze is waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) * completion, since the timeout code would not be able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) * acquire the queue reference here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * That's why we don't use blk_queue_enter here; instead, we use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * percpu_ref_tryget directly, because we need to be able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) * obtain a reference even in the short window between the queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * starting to freeze, by dropping the first reference in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * blk_freeze_queue_start, and the moment the last request is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) * consumed, marked by the instant q_usage_counter reaches
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (!percpu_ref_tryget(&q->q_usage_counter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) if (next != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) mod_timer(&q->timeout, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) * Request timeouts are handled as a forward rolling timer. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) * we end up here it means that no requests are pending and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) * also that no request has been pending for a while. Mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) * each hctx as idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) /* the hctx may be unmapped, so check it here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) if (blk_mq_hw_queue_mapped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) blk_mq_tag_idle(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) blk_queue_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) struct flush_busy_ctx_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) struct list_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) struct flush_busy_ctx_data *flush_data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) struct blk_mq_hw_ctx *hctx = flush_data->hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) enum hctx_type type = hctx->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) spin_lock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) sbitmap_clear_bit(sb, bitnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) spin_unlock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * Process software queues that have been marked busy, splicing them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) * to the for-dispatch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) struct flush_busy_ctx_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) .hctx = hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) .list = list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) struct dispatch_rq_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) struct dispatch_rq_data *dispatch_data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) enum hctx_type type = hctx->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) spin_lock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) if (!list_empty(&ctx->rq_lists[type])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) list_del_init(&dispatch_data->rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) if (list_empty(&ctx->rq_lists[type]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) sbitmap_clear_bit(sb, bitnr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) spin_unlock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) return !dispatch_data->rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) struct blk_mq_ctx *start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) unsigned off = start ? start->index_hw[hctx->type] : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) struct dispatch_rq_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) .hctx = hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) .rq = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) __sbitmap_for_each_set(&hctx->ctx_map, off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) dispatch_rq_from_ctx, &data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) return data.rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) static inline unsigned int queued_to_index(unsigned int queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) if (!queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) static bool __blk_mq_get_driver_tag(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) struct sbitmap_queue *bt = rq->mq_hctx->tags->bitmap_tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) int tag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) blk_mq_tag_busy(rq->mq_hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) bt = rq->mq_hctx->tags->breserved_tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) tag_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (!hctx_may_queue(rq->mq_hctx, bt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) tag = __sbitmap_queue_get(bt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) if (tag == BLK_MQ_NO_TAG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) rq->tag = tag + tag_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) static bool blk_mq_get_driver_tag(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) rq->rq_flags |= RQF_MQ_INFLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) __blk_mq_inc_active_requests(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) hctx->tags->rqs[rq->tag] = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) int flags, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) spin_lock(&hctx->dispatch_wait_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) if (!list_empty(&wait->entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) struct sbitmap_queue *sbq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) list_del_init(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) sbq = hctx->tags->bitmap_tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) atomic_dec(&sbq->ws_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) spin_unlock(&hctx->dispatch_wait_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) blk_mq_run_hw_queue(hctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) * Mark us waiting for a tag. For shared tags, this involves hooking us into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) * the tag wakeups. For non-shared tags, we can simply mark us needing a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) * restart. For both cases, take care to check the condition again after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) * marking us as waiting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) struct sbitmap_queue *sbq = hctx->tags->bitmap_tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) struct wait_queue_head *wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) wait_queue_entry_t *wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) blk_mq_sched_mark_restart_hctx(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) * It's possible that a tag was freed in the window between the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) * allocation failure and adding the hardware queue to the wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) * queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) * Don't clear RESTART here, someone else could have set it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) * At most this will cost an extra queue run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) return blk_mq_get_driver_tag(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) wait = &hctx->dispatch_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) if (!list_empty_careful(&wait->entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) wq = &bt_wait_ptr(sbq, hctx)->wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) spin_lock_irq(&wq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) spin_lock(&hctx->dispatch_wait_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) if (!list_empty(&wait->entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) spin_unlock(&hctx->dispatch_wait_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) spin_unlock_irq(&wq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) atomic_inc(&sbq->ws_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) wait->flags &= ~WQ_FLAG_EXCLUSIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) __add_wait_queue(wq, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * It's possible that a tag was freed in the window between the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * allocation failure and adding the hardware queue to the wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) * queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) ret = blk_mq_get_driver_tag(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) spin_unlock(&hctx->dispatch_wait_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) spin_unlock_irq(&wq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * We got a tag, remove ourselves from the wait queue to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * someone else gets the wakeup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) list_del_init(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) atomic_dec(&sbq->ws_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) spin_unlock(&hctx->dispatch_wait_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) spin_unlock_irq(&wq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) #define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) #define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) * - EWMA is one simple way to compute running average value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) * - take 4 as factor for avoiding to get too small(0) result, and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) * factor doesn't matter because EWMA decreases exponentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) unsigned int ewma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) ewma = hctx->dispatch_busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) if (!ewma && !busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) if (busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) hctx->dispatch_busy = ewma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) #define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) static void blk_mq_handle_dev_resource(struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) struct request *next =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) list_first_entry_or_null(list, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) * If an I/O scheduler has been configured and we got a driver tag for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) * the next request already, free it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) if (next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) blk_mq_put_driver_tag(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) list_add(&rq->queuelist, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) __blk_mq_requeue_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) static void blk_mq_handle_zone_resource(struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) struct list_head *zone_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * If we end up here it is because we cannot dispatch a request to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * specific zone due to LLD level zone-write locking or other zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * related resource not being available. In this case, set the request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) * aside in zone_list for retrying it later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) list_add(&rq->queuelist, zone_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) __blk_mq_requeue_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) enum prep_dispatch {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) PREP_DISPATCH_OK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) PREP_DISPATCH_NO_TAG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) PREP_DISPATCH_NO_BUDGET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) bool need_budget)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) blk_mq_put_driver_tag(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) return PREP_DISPATCH_NO_BUDGET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) if (!blk_mq_get_driver_tag(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) * The initial allocation attempt failed, so we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) * rerun the hardware queue when a tag is freed. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) * waitqueue takes care of that. If the queue is run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) * before we add this entry back on the dispatch list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) * we'll re-run it below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (!blk_mq_mark_tag_wait(hctx, rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * All budgets not got from this function will be put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) * together during handling partial dispatch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) if (need_budget)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) blk_mq_put_dispatch_budget(rq->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) return PREP_DISPATCH_NO_TAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) return PREP_DISPATCH_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) /* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) static void blk_mq_release_budgets(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) unsigned int nr_budgets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) for (i = 0; i < nr_budgets; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) blk_mq_put_dispatch_budget(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) * Returns true if we did some work AND can potentially do more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) unsigned int nr_budgets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) enum prep_dispatch prep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) struct request_queue *q = hctx->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) struct request *rq, *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) int errors, queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) blk_status_t ret = BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) LIST_HEAD(zone_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) bool needs_resource = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) if (list_empty(list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) * Now process all the entries, sending them to the driver.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) errors = queued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) struct blk_mq_queue_data bd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) rq = list_first_entry(list, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) WARN_ON_ONCE(hctx != rq->mq_hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (prep != PREP_DISPATCH_OK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) list_del_init(&rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) bd.rq = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) * Flag last if we have no more requests, or if we have more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) * but can't assign a driver tag to it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) if (list_empty(list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) bd.last = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) nxt = list_first_entry(list, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) bd.last = !blk_mq_get_driver_tag(nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) * once the request is queued to lld, no need to cover the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) * budget any more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) if (nr_budgets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) nr_budgets--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) ret = q->mq_ops->queue_rq(hctx, &bd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) case BLK_STS_OK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) queued++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) case BLK_STS_RESOURCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) needs_resource = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) case BLK_STS_DEV_RESOURCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) blk_mq_handle_dev_resource(rq, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) case BLK_STS_ZONE_RESOURCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) * Move the request to zone_list and keep going through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) * the dispatch list to find more requests the drive can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) * accept.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) blk_mq_handle_zone_resource(rq, &zone_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) needs_resource = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) blk_mq_end_request(rq, BLK_STS_IOERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) } while (!list_empty(list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) if (!list_empty(&zone_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) list_splice_tail_init(&zone_list, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) hctx->dispatched[queued_to_index(queued)]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) /* If we didn't flush the entire list, we could have told the driver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) * there was more coming, but that turned out to be a lie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) q->mq_ops->commit_rqs(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) * Any items that need requeuing? Stuff them into hctx->dispatch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) * that is where we will continue on next queue run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) if (!list_empty(list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) bool needs_restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) /* For non-shared tags, the RESTART check will suffice */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) blk_mq_release_budgets(q, nr_budgets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) spin_lock(&hctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) list_splice_tail_init(list, &hctx->dispatch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) spin_unlock(&hctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) * Order adding requests to hctx->dispatch and checking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) * SCHED_RESTART flag. The pair of this smp_mb() is the one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) * in blk_mq_sched_restart(). Avoid restart code path to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) * miss the new added requests to hctx->dispatch, meantime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) * SCHED_RESTART is observed here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) * If SCHED_RESTART was set by the caller of this function and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * it is no longer set that means that it was cleared by another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) * thread and hence that a queue rerun is needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) * If 'no_tag' is set, that means that we failed getting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) * a driver tag with an I/O scheduler attached. If our dispatch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) * waitqueue is no longer active, ensure that we run the queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) * AFTER adding our entries back to the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) * If no I/O scheduler has been configured it is possible that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) * the hardware queue got stopped and restarted before requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) * were pushed back onto the dispatch list. Rerun the queue to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) * avoid starvation. Notes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) * - blk_mq_run_hw_queue() checks whether or not a queue has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) * been stopped before rerunning a queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) * - Some but not all block drivers stop a queue before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * and dm-rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) * bit is set, run queue after a delay to avoid IO stalls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * that could otherwise occur if the queue is idle. We'll do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * similar if we couldn't get budget or couldn't lock a zone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * and SCHED_RESTART is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) needs_restart = blk_mq_sched_needs_restart(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) if (prep == PREP_DISPATCH_NO_BUDGET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) needs_resource = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) if (!needs_restart ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) blk_mq_run_hw_queue(hctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) else if (needs_restart && needs_resource)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) blk_mq_update_dispatch_busy(hctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) blk_mq_update_dispatch_busy(hctx, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) return (queued + errors) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) * __blk_mq_run_hw_queue - Run a hardware queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) * @hctx: Pointer to the hardware queue to run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) * Send pending requests to the hardware.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) int srcu_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) * We should be running this queue from one of the CPUs that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) * are mapped to it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) * There are at least two related races now between setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) * hctx->next_cpu from blk_mq_hctx_next_cpu() and running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) * __blk_mq_run_hw_queue():
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) * - hctx->next_cpu is found offline in blk_mq_hctx_next_cpu(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) * but later it becomes online, then this warning is harmless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * at all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) * - hctx->next_cpu is found online in blk_mq_hctx_next_cpu(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) * but later it becomes offline, then the warning can't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) * triggered, and we depend on blk-mq timeout handler to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) * handle dispatched requests to this hctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) cpu_online(hctx->next_cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) raw_smp_processor_id(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) cpumask_empty(hctx->cpumask) ? "inactive": "active");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) * We can't run the queue inline with ints disabled. Ensure that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) * we catch bad users of this early.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) WARN_ON_ONCE(in_interrupt());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) hctx_lock(hctx, &srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) blk_mq_sched_dispatch_requests(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) hctx_unlock(hctx, srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) if (cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) cpu = cpumask_first(hctx->cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * It'd be great if the workqueue API had a way to pass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * in a mask and had some smarts for more clever placement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) * For now we just round-robin here, switching for every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * BLK_MQ_CPU_WORK_BATCH queued items.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) bool tried = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) int next_cpu = hctx->next_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) if (hctx->queue->nr_hw_queues == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) return WORK_CPU_UNBOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) if (--hctx->next_cpu_batch <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) select_cpu:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) if (next_cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) next_cpu = blk_mq_first_mapped_cpu(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) * Do unbound schedule if we can't find a online CPU for this hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) * and it should only happen in the path of handling CPU DEAD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) if (!cpu_online(next_cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (!tried) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) tried = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) goto select_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) * Make sure to re-select CPU next time once after CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) * in hctx->cpumask become online again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) hctx->next_cpu = next_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) hctx->next_cpu_batch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) return WORK_CPU_UNBOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) hctx->next_cpu = next_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) return next_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) * @hctx: Pointer to the hardware queue to run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * @async: If we want to run the queue asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) * @msecs: Microseconds of delay to wait before running the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) * If !@async, try to run the queue now. Else, run the queue asynchronously and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) * with a delay of @msecs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) unsigned long msecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) if (unlikely(blk_mq_hctx_stopped(hctx)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) int cpu = get_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) if (cpumask_test_cpu(cpu, hctx->cpumask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) __blk_mq_run_hw_queue(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) msecs_to_jiffies(msecs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * @hctx: Pointer to the hardware queue to run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * @msecs: Microseconds of delay to wait before running the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) * Run a hardware queue asynchronously with a delay of @msecs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) __blk_mq_delay_run_hw_queue(hctx, true, msecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) * blk_mq_run_hw_queue - Start to run a hardware queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) * @hctx: Pointer to the hardware queue to run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * @async: If we want to run the queue asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) * Check if the request queue is not in a quiesced state and if there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) * pending requests to be sent. If this is true, run the queue to send requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * to hardware.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) int srcu_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) bool need_run;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) * When queue is quiesced, we may be switching io scheduler, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) * updating nr_hw_queues, or other things, and we can't run queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) * any more, even __blk_mq_hctx_has_pending() can't be called safely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) * And queue will be rerun in blk_mq_unquiesce_queue() if it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) * quiesced.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) hctx_lock(hctx, &srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) need_run = !blk_queue_quiesced(hctx->queue) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) blk_mq_hctx_has_pending(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) hctx_unlock(hctx, srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) if (need_run)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) __blk_mq_delay_run_hw_queue(hctx, async, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) EXPORT_SYMBOL(blk_mq_run_hw_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) * Is the request queue handled by an IO scheduler that does not respect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) * hardware queues when dispatching?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) static bool blk_mq_has_sqsched(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) struct elevator_queue *e = q->elevator;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) if (e && e->type->ops.dispatch_request &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) * Return prefered queue to dispatch from (if any) for non-mq aware IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) * scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) * If the IO scheduler does not respect hardware queues when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) * dispatching, we just don't bother with multiple HW queues and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) * dispatch from hctx for the current CPU since running multiple queues
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) * just causes lock contention inside the scheduler and pointless cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) * bouncing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) raw_smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) if (!blk_mq_hctx_stopped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) return hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) * @q: Pointer to the request queue to run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) * @async: If we want to run the queue asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) void blk_mq_run_hw_queues(struct request_queue *q, bool async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) struct blk_mq_hw_ctx *hctx, *sq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) sq_hctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) if (blk_mq_has_sqsched(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) sq_hctx = blk_mq_get_sq_hctx(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) if (blk_mq_hctx_stopped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) * Dispatch from this hctx either if there's no hctx preferred
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) * by IO scheduler or if it has requests that bypass the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) * scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) if (!sq_hctx || sq_hctx == hctx ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) !list_empty_careful(&hctx->dispatch))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) blk_mq_run_hw_queue(hctx, async);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) EXPORT_SYMBOL(blk_mq_run_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) * @q: Pointer to the request queue to run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) * @msecs: Microseconds of delay to wait before running the queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) struct blk_mq_hw_ctx *hctx, *sq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) sq_hctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) if (blk_mq_has_sqsched(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) sq_hctx = blk_mq_get_sq_hctx(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) if (blk_mq_hctx_stopped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) * Dispatch from this hctx either if there's no hctx preferred
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) * by IO scheduler or if it has requests that bypass the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) * scheduler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) if (!sq_hctx || sq_hctx == hctx ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) !list_empty_careful(&hctx->dispatch))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) blk_mq_delay_run_hw_queue(hctx, msecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) * @q: request queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) * The caller is responsible for serializing this function against
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) * blk_mq_{start,stop}_hw_queue().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) bool blk_mq_queue_stopped(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) queue_for_each_hw_ctx(q, hctx, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (blk_mq_hctx_stopped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) EXPORT_SYMBOL(blk_mq_queue_stopped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) * This function is often used for pausing .queue_rq() by driver when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) * there isn't enough resource or some conditions aren't satisfied, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) * BLK_STS_RESOURCE is usually returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) * We do not guarantee that dispatch can be drained or blocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) * after blk_mq_stop_hw_queue() returns. Please use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) * blk_mq_quiesce_queue() for that requirement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) cancel_delayed_work(&hctx->run_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) set_bit(BLK_MQ_S_STOPPED, &hctx->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) EXPORT_SYMBOL(blk_mq_stop_hw_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) * This function is often used for pausing .queue_rq() by driver when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) * there isn't enough resource or some conditions aren't satisfied, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) * BLK_STS_RESOURCE is usually returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) * We do not guarantee that dispatch can be drained or blocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) * after blk_mq_stop_hw_queues() returns. Please use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * blk_mq_quiesce_queue() for that requirement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) void blk_mq_stop_hw_queues(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) queue_for_each_hw_ctx(q, hctx, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) blk_mq_stop_hw_queue(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) EXPORT_SYMBOL(blk_mq_stop_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) blk_mq_run_hw_queue(hctx, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) EXPORT_SYMBOL(blk_mq_start_hw_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) void blk_mq_start_hw_queues(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) queue_for_each_hw_ctx(q, hctx, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) blk_mq_start_hw_queue(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) EXPORT_SYMBOL(blk_mq_start_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) if (!blk_mq_hctx_stopped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) blk_mq_run_hw_queue(hctx, async);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) queue_for_each_hw_ctx(q, hctx, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) blk_mq_start_stopped_hw_queue(hctx, async);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static void blk_mq_run_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) * If we are stopped, don't run the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) if (blk_mq_hctx_stopped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) __blk_mq_run_hw_queue(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) bool at_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) struct blk_mq_ctx *ctx = rq->mq_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) enum hctx_type type = hctx->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) lockdep_assert_held(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) trace_block_rq_insert(hctx->queue, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) if (at_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) list_add(&rq->queuelist, &ctx->rq_lists[type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) bool at_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) struct blk_mq_ctx *ctx = rq->mq_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) lockdep_assert_held(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) __blk_mq_insert_req_list(hctx, rq, at_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) blk_mq_hctx_mark_pending(hctx, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) * blk_mq_request_bypass_insert - Insert a request at dispatch list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) * @rq: Pointer to request to be inserted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) * @at_head: true if the request should be inserted at the head of the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) * @run_queue: If we should run the hardware queue after inserting the request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * Should only be used carefully, when the caller knows we want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) * bypass a potential IO scheduler on the target device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) bool run_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) spin_lock(&hctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) if (at_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) list_add(&rq->queuelist, &hctx->dispatch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) list_add_tail(&rq->queuelist, &hctx->dispatch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) spin_unlock(&hctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) if (run_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) blk_mq_run_hw_queue(hctx, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) enum hctx_type type = hctx->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) * preemption doesn't flush plug list, so it's possible ctx->cpu is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) * offline now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) list_for_each_entry(rq, list, queuelist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) BUG_ON(rq->mq_ctx != ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) trace_block_rq_insert(hctx->queue, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) spin_lock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) list_splice_tail_init(list, &ctx->rq_lists[type]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) blk_mq_hctx_mark_pending(hctx, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) spin_unlock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) struct request *rqa = container_of(a, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) struct request *rqb = container_of(b, struct request, queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) if (rqa->mq_ctx != rqb->mq_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) return rqa->mq_ctx > rqb->mq_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) if (rqa->mq_hctx != rqb->mq_hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) return rqa->mq_hctx > rqb->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) return blk_rq_pos(rqa) > blk_rq_pos(rqb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) if (list_empty(&plug->mq_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) list_splice_init(&plug->mq_list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) if (plug->rq_count > 2 && plug->multiple_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) list_sort(NULL, &list, plug_rq_cmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) plug->rq_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) struct list_head rq_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) struct request *rq, *head_rq = list_entry_rq(list.next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) struct list_head *pos = &head_rq->queuelist; /* skip first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) unsigned int depth = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) list_for_each_continue(pos, &list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) rq = list_entry_rq(pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) BUG_ON(!rq->q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) depth++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) list_cut_before(&rq_list, &list, pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) trace_block_unplug(head_rq->q, depth, !from_schedule);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) from_schedule);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) } while(!list_empty(&list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) unsigned int nr_segs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) if (bio->bi_opf & REQ_RAHEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) rq->cmd_flags |= REQ_FAILFAST_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) rq->__sector = bio->bi_iter.bi_sector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) rq->write_hint = bio->bi_write_hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) blk_rq_bio_prep(rq, bio, nr_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) WARN_ON_ONCE(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) blk_account_io_start(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) blk_qc_t *cookie, bool last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) struct blk_mq_queue_data bd = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) .rq = rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) .last = last,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) blk_qc_t new_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) blk_status_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) new_cookie = request_to_qc_t(hctx, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) * For OK queue, we are done. For error, caller may kill it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) * Any other error (busy), just add it to our list as we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) * previously would have done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) ret = q->mq_ops->queue_rq(hctx, &bd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) case BLK_STS_OK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) blk_mq_update_dispatch_busy(hctx, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) *cookie = new_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) case BLK_STS_RESOURCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) case BLK_STS_DEV_RESOURCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) blk_mq_update_dispatch_busy(hctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) __blk_mq_requeue_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) blk_mq_update_dispatch_busy(hctx, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) *cookie = BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) blk_qc_t *cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) bool bypass_insert, bool last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) struct request_queue *q = rq->q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) bool run_queue = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) * RCU or SRCU read lock is needed before checking quiesced flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) * When queue is stopped or quiesced, ignore 'bypass_insert' from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) * and avoid driver to try to dispatch again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) run_queue = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) bypass_insert = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) if (q->elevator && !bypass_insert)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) if (!blk_mq_get_dispatch_budget(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) if (!blk_mq_get_driver_tag(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) blk_mq_put_dispatch_budget(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) return __blk_mq_issue_directly(hctx, rq, cookie, last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) insert:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) if (bypass_insert)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) return BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) blk_mq_sched_insert_request(rq, false, run_queue, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) return BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) * blk_mq_try_issue_directly - Try to send a request directly to device driver.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) * @hctx: Pointer of the associated hardware queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) * @rq: Pointer to request to be sent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) * @cookie: Request queue cookie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) * If the device has enough resources to accept a new request now, send the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) * request directly to device driver. Else, insert at hctx->dispatch queue, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) * we can try send it another time in the future. Requests inserted at this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) * queue have higher priority.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) struct request *rq, blk_qc_t *cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) blk_status_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) int srcu_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) hctx_lock(hctx, &srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) blk_mq_request_bypass_insert(rq, false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) else if (ret != BLK_STS_OK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) blk_mq_end_request(rq, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) hctx_unlock(hctx, srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) blk_status_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) int srcu_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) blk_qc_t unused_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) hctx_lock(hctx, &srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) hctx_unlock(hctx, srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) struct list_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) int queued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) int errors = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) while (!list_empty(list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) blk_status_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) struct request *rq = list_first_entry(list, struct request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) list_del_init(&rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) ret = blk_mq_request_issue_directly(rq, list_empty(list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) if (ret != BLK_STS_OK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) if (ret == BLK_STS_RESOURCE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) ret == BLK_STS_DEV_RESOURCE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) blk_mq_request_bypass_insert(rq, false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) list_empty(list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) blk_mq_end_request(rq, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) errors++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) queued++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) * If we didn't flush the entire list, we could have told
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) * the driver there was more coming, but that turned out to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) * be a lie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) if ((!list_empty(list) || errors) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) hctx->queue->mq_ops->commit_rqs && queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) hctx->queue->mq_ops->commit_rqs(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) list_add_tail(&rq->queuelist, &plug->mq_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) plug->rq_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) struct request *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) tmp = list_first_entry(&plug->mq_list, struct request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) if (tmp->q != rq->q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) plug->multiple_queues = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) * queues. This is important for md arrays to benefit from merging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) * requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) if (plug->multiple_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) return BLK_MAX_REQUEST_COUNT * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) return BLK_MAX_REQUEST_COUNT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) * blk_mq_submit_bio - Create and send a request to block device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) * @bio: Bio pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) * Builds up a request structure from @q and @bio and send to the device. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) * request may not be queued directly to hardware if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) * * This request can be merged with another one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) * * We want to place request at plug queue for possible future merging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) * * There is an IO scheduler active at this queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) * It will not queue the request if there is an error with the bio, or at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) * request creation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) * Returns: Request queue cookie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) blk_qc_t blk_mq_submit_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) struct request_queue *q = bio->bi_disk->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) const int is_sync = op_is_sync(bio->bi_opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) const int is_flush_fua = op_is_flush(bio->bi_opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) struct blk_mq_alloc_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) .q = q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) struct blk_plug *plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) struct request *same_queue_rq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) unsigned int nr_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) blk_qc_t cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) blk_status_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) blk_queue_bounce(q, &bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) __blk_queue_split(&bio, &nr_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) if (!bio_integrity_prep(bio))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) goto queue_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) if (!is_flush_fua && !blk_queue_nomerges(q) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) goto queue_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) if (blk_mq_sched_bio_merge(q, bio, nr_segs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) goto queue_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) rq_qos_throttle(q, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) data.cmd_flags = bio->bi_opf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) rq = __blk_mq_alloc_request(&data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) if (unlikely(!rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) rq_qos_cleanup(q, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) if (bio->bi_opf & REQ_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) bio_wouldblock_error(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) goto queue_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) trace_block_getrq(q, bio, bio->bi_opf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) rq_qos_track(q, rq, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) cookie = request_to_qc_t(data.hctx, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) blk_mq_bio_to_request(rq, bio, nr_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) ret = blk_crypto_init_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) if (ret != BLK_STS_OK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) bio->bi_status = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) blk_mq_free_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) plug = blk_mq_plug(q, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) if (unlikely(is_flush_fua)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) /* Bypass scheduler for flush requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) blk_insert_flush(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) blk_mq_run_hw_queue(data.hctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) } else if (plug && (q->nr_hw_queues == 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) blk_mq_is_sbitmap_shared(rq->mq_hctx->flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * Use plugging if we have a ->commit_rqs() hook as well, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * we know the driver uses bd->last in a smart fashion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * Use normal plugging if this disk is slow HDD, as sequential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) * IO may benefit a lot from plug merging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) unsigned int request_count = plug->rq_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) struct request *last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) if (!request_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) trace_block_plug(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) last = list_entry_rq(plug->mq_list.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) if (request_count >= blk_plug_max_rq_count(plug) || (last &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) blk_flush_plug_list(plug, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) trace_block_plug(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) blk_add_rq_to_plug(plug, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) } else if (q->elevator) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) /* Insert the request at the IO scheduler queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) blk_mq_sched_insert_request(rq, false, true, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) } else if (plug && !blk_queue_nomerges(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) * We do limited plugging. If the bio can be merged, do that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) * Otherwise the existing request in the plug list will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) * issued. So the plug list will have one request at most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * The plug list might get flushed before this. If that happens,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) * the plug list is empty, and same_queue_rq is invalid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) if (list_empty(&plug->mq_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) same_queue_rq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) if (same_queue_rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) list_del_init(&same_queue_rq->queuelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) plug->rq_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) blk_add_rq_to_plug(plug, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) trace_block_plug(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) if (same_queue_rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) data.hctx = same_queue_rq->mq_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) trace_block_unplug(q, 1, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) blk_mq_try_issue_directly(data.hctx, same_queue_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) &cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) } else if ((q->nr_hw_queues > 1 && is_sync) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) !data.hctx->dispatch_busy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) * There is no scheduler and we can try to send directly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) * to the hardware.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) blk_mq_try_issue_directly(data.hctx, rq, &cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) /* Default case. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) blk_mq_sched_insert_request(rq, false, true, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) return cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) queue_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) blk_queue_exit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) return BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) static size_t order_to_size(unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) return (size_t)PAGE_SIZE << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) /* called before freeing request pool in @tags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) struct blk_mq_tags *tags, unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) struct blk_mq_tags *drv_tags = set->tags[hctx_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) list_for_each_entry(page, &tags->page_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) unsigned long start = (unsigned long)page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) unsigned long end = start + order_to_size(page->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) for (i = 0; i < set->queue_depth; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) struct request *rq = drv_tags->rqs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) unsigned long rq_addr = (unsigned long)rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) if (rq_addr >= start && rq_addr < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) cmpxchg(&drv_tags->rqs[i], rq, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) * Wait until all pending iteration is done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) * Request reference is cleared and it is guaranteed to be observed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) * after the ->lock is released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) spin_lock_irqsave(&drv_tags->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) spin_unlock_irqrestore(&drv_tags->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) if (tags->rqs && set->ops->exit_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) for (i = 0; i < tags->nr_tags; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) struct request *rq = tags->static_rqs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) if (!rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) set->ops->exit_request(set, rq, hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) tags->static_rqs[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) blk_mq_clear_rq_mapping(set, tags, hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) while (!list_empty(&tags->page_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) page = list_first_entry(&tags->page_list, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) list_del_init(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) * Remove kmemleak object previously allocated in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) * blk_mq_alloc_rqs().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) kmemleak_free(page_address(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) __free_pages(page, page->private);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) kfree(tags->rqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) tags->rqs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) kfree(tags->static_rqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) tags->static_rqs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) blk_mq_free_tags(tags, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) unsigned int hctx_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) unsigned int nr_tags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) unsigned int reserved_tags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) struct blk_mq_tags *tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) if (node == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) node = set->numa_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) if (!tags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) if (!tags->rqs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) blk_mq_free_tags(tags, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) if (!tags->static_rqs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) kfree(tags->rqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) blk_mq_free_tags(tags, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) return tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) unsigned int hctx_idx, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) if (set->ops->init_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) ret = set->ops->init_request(set, rq, hctx_idx, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) WRITE_ONCE(rq->state, MQ_RQ_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) unsigned int hctx_idx, unsigned int depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) unsigned int i, j, entries_per_page, max_order = 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) size_t rq_size, left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) if (node == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) node = set->numa_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) INIT_LIST_HEAD(&tags->page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) * rq_size is the size of the request plus driver payload, rounded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) * to the cacheline size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) rq_size = round_up(sizeof(struct request) + set->cmd_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) cache_line_size());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) trace_android_vh_blk_alloc_rqs(&rq_size, set, tags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) left = rq_size * depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) for (i = 0; i < depth; ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) int this_order = max_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) int to_do;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) while (this_order && left < order_to_size(this_order - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) this_order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) page = alloc_pages_node(node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) this_order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) if (!this_order--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) if (order_to_size(this_order) < rq_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) page->private = this_order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) list_add_tail(&page->lru, &tags->page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) p = page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) * Allow kmemleak to scan these pages as they contain pointers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) * to additional allocations like via ops->init_request().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) entries_per_page = order_to_size(this_order) / rq_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) to_do = min(entries_per_page, depth - i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) left -= to_do * rq_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) for (j = 0; j < to_do; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) struct request *rq = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) tags->static_rqs[i] = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) if (blk_mq_init_request(set, rq, hctx_idx, node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) tags->static_rqs[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) p += rq_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) blk_mq_free_rqs(set, tags, hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) struct rq_iter_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) bool has_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) static bool blk_mq_has_request(struct request *rq, void *data, bool reserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) struct rq_iter_data *iter_data = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) if (rq->mq_hctx != iter_data->hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) iter_data->has_rq = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) struct blk_mq_tags *tags = hctx->sched_tags ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) hctx->sched_tags : hctx->tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) struct rq_iter_data data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) .hctx = hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) blk_mq_all_tag_iter(tags, blk_mq_has_request, &data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) return data.has_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) struct blk_mq_hw_ctx, cpuhp_online);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) !blk_mq_last_cpu_in_hctx(cpu, hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) * Prevent new request from being allocated on the current hctx.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) * The smp_mb__after_atomic() Pairs with the implied barrier in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) * test_and_set_bit_lock in sbitmap_get(). Ensures the inactive flag is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) * seen once we return from the tag allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) set_bit(BLK_MQ_S_INACTIVE, &hctx->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) * Try to grab a reference to the queue and wait for any outstanding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) * requests. If we could not grab a reference the queue has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) * frozen and there are no requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) while (blk_mq_hctx_has_requests(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) msleep(5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) percpu_ref_put(&hctx->queue->q_usage_counter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) struct blk_mq_hw_ctx, cpuhp_online);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) if (cpumask_test_cpu(cpu, hctx->cpumask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) * 'cpu' is going away. splice any existing rq_list entries from this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) * software queue to the hw queue dispatch list, and ensure that it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) * gets run.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) struct blk_mq_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) LIST_HEAD(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) enum hctx_type type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) if (!cpumask_test_cpu(cpu, hctx->cpumask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) ctx = __blk_mq_get_ctx(hctx->queue, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) type = hctx->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) spin_lock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) if (!list_empty(&ctx->rq_lists[type])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) list_splice_init(&ctx->rq_lists[type], &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) blk_mq_hctx_clear_pending(hctx, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) spin_unlock(&ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) if (list_empty(&tmp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) spin_lock(&hctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) list_splice_tail_init(&tmp, &hctx->dispatch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) spin_unlock(&hctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) blk_mq_run_hw_queue(hctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) if (!(hctx->flags & BLK_MQ_F_STACKING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) &hctx->cpuhp_online);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) &hctx->cpuhp_dead);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) * Before freeing hw queue, clearing the flush request reference in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) * tags->rqs[] for avoiding potential UAF.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) unsigned int queue_depth, struct request *flush_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) /* The hw queue may not be mapped yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) if (!tags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) for (i = 0; i < queue_depth; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) cmpxchg(&tags->rqs[i], flush_rq, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) * Wait until all pending iteration is done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) * Request reference is cleared and it is guaranteed to be observed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) * after the ->lock is released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) spin_lock_irqsave(&tags->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) spin_unlock_irqrestore(&tags->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) /* hctx->ctxs will be freed in queue's release handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) static void blk_mq_exit_hctx(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) struct request *flush_rq = hctx->fq->flush_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) if (blk_mq_hw_queue_mapped(hctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) blk_mq_tag_idle(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) set->queue_depth, flush_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) if (set->ops->exit_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) set->ops->exit_request(set, flush_rq, hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) if (set->ops->exit_hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) set->ops->exit_hctx(hctx, hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) blk_mq_remove_cpuhp(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) spin_lock(&q->unused_hctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) list_add(&hctx->hctx_list, &q->unused_hctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) spin_unlock(&q->unused_hctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) static void blk_mq_exit_hw_queues(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) struct blk_mq_tag_set *set, int nr_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) if (i == nr_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) blk_mq_debugfs_unregister_hctx(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) blk_mq_exit_hctx(q, set, hctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) __alignof__(struct blk_mq_hw_ctx)) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) sizeof(struct blk_mq_hw_ctx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) if (tag_set->flags & BLK_MQ_F_BLOCKING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) hw_ctx_size += sizeof(struct srcu_struct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) return hw_ctx_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) static int blk_mq_init_hctx(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) hctx->queue_num = hctx_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) if (!(hctx->flags & BLK_MQ_F_STACKING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) &hctx->cpuhp_online);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) hctx->tags = set->tags[hctx_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) if (set->ops->init_hctx &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) goto unregister_cpu_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) hctx->numa_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) goto exit_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) exit_hctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) if (set->ops->exit_hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) set->ops->exit_hctx(hctx, hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) unregister_cpu_notifier:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) blk_mq_remove_cpuhp(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) static struct blk_mq_hw_ctx *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) if (!hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) goto fail_alloc_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) goto free_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) atomic_set(&hctx->nr_active, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) if (node == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) node = set->numa_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) hctx->numa_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) spin_lock_init(&hctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) INIT_LIST_HEAD(&hctx->dispatch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) hctx->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) INIT_LIST_HEAD(&hctx->hctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) * Allocate space for all possible cpus to avoid allocation at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) * runtime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) gfp, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) if (!hctx->ctxs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) goto free_cpumask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) gfp, node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) goto free_ctxs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) hctx->nr_ctx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) spin_lock_init(&hctx->dispatch_wait_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) if (!hctx->fq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) goto free_bitmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) if (hctx->flags & BLK_MQ_F_BLOCKING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) init_srcu_struct(hctx->srcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) blk_mq_hctx_kobj_init(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) return hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) free_bitmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) sbitmap_free(&hctx->ctx_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) free_ctxs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) kfree(hctx->ctxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) free_cpumask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) free_cpumask_var(hctx->cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) free_hctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) kfree(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) fail_alloc_hctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) static void blk_mq_init_cpu_queues(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) unsigned int nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) struct blk_mq_tag_set *set = q->tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) unsigned int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) int k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) __ctx->cpu = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) spin_lock_init(&__ctx->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) INIT_LIST_HEAD(&__ctx->rq_lists[k]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) __ctx->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) * Set local node, IFF we have more than one hw queue. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) * not, we remain on the home node of the device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) for (j = 0; j < set->nr_maps; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) hctx = blk_mq_map_queue_type(q, j, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) hctx->numa_node = cpu_to_node(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) unsigned int flags = set->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) set->queue_depth, set->reserved_tags, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) if (!set->tags[hctx_idx])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) set->queue_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) blk_mq_free_rq_map(set->tags[hctx_idx], flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) set->tags[hctx_idx] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) unsigned int flags = set->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) if (set->tags && set->tags[hctx_idx]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) blk_mq_free_rq_map(set->tags[hctx_idx], flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) set->tags[hctx_idx] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) static void blk_mq_map_swqueue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) unsigned int i, j, hctx_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) struct blk_mq_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) struct blk_mq_tag_set *set = q->tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) cpumask_clear(hctx->cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) hctx->nr_ctx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) hctx->dispatch_from = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) * Map software to hardware queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) * If the cpu isn't present, the cpu is mapped to first hctx.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) ctx = per_cpu_ptr(q->queue_ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) for (j = 0; j < set->nr_maps; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) if (!set->map[j].nr_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) ctx->hctxs[j] = blk_mq_map_queue_type(q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) HCTX_TYPE_DEFAULT, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) hctx_idx = set->map[j].mq_map[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) /* unmapped hw queue can be remapped after CPU topo changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) if (!set->tags[hctx_idx] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) !__blk_mq_alloc_map_and_request(set, hctx_idx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) * If tags initialization fail for some hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) * that hctx won't be brought online. In this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) * case, remap the current ctx to hctx[0] which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) * is guaranteed to always have tags allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) set->map[j].mq_map[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) hctx = blk_mq_map_queue_type(q, j, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) ctx->hctxs[j] = hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) * If the CPU is already set in the mask, then we've
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) * mapped this one already. This can happen if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) * devices share queues across queue maps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) if (cpumask_test_cpu(i, hctx->cpumask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) cpumask_set_cpu(i, hctx->cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) hctx->type = j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) ctx->index_hw[hctx->type] = hctx->nr_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) hctx->ctxs[hctx->nr_ctx++] = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) * If the nr_ctx type overflows, we have exceeded the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) * amount of sw queues we can support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) BUG_ON(!hctx->nr_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) for (; j < HCTX_MAX_TYPES; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) ctx->hctxs[j] = blk_mq_map_queue_type(q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) HCTX_TYPE_DEFAULT, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) * If no software queues are mapped to this hardware queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) * disable it and free the request entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) if (!hctx->nr_ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) /* Never unmap queue 0. We need it as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) * fallback in case of a new remap fails
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) * allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) if (i && set->tags[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) blk_mq_free_map_and_requests(set, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) hctx->tags = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) hctx->tags = set->tags[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) WARN_ON(!hctx->tags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) * Set the map size to the number of mapped software queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) * This is more accurate and more efficient than looping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) * over all possibly mapped software queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) * Initialize batch roundrobin counts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) * Caller needs to ensure that we're either frozen/quiesced, or that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) * the queue isn't live yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) static void queue_set_hctx_shared(struct request_queue *q, bool shared)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) if (shared)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) bool shared)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) lockdep_assert_held(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) list_for_each_entry(q, &set->tag_list, tag_set_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) queue_set_hctx_shared(q, shared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) static void blk_mq_del_queue_tag_set(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) struct blk_mq_tag_set *set = q->tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) mutex_lock(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) list_del(&q->tag_set_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) if (list_is_singular(&set->tag_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) /* just transitioned to unshared */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) /* update existing queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) blk_mq_update_tag_set_shared(set, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) mutex_unlock(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) INIT_LIST_HEAD(&q->tag_set_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) mutex_lock(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) * Check to see if we're transitioning to shared (from 1 to 2 queues).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) if (!list_empty(&set->tag_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) /* update existing queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) blk_mq_update_tag_set_shared(set, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) queue_set_hctx_shared(q, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) list_add_tail(&q->tag_set_list, &set->tag_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) mutex_unlock(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) /* All allocations will be freed in release handler of q->mq_kobj */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) static int blk_mq_alloc_ctxs(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) struct blk_mq_ctxs *ctxs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) if (!ctxs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) if (!ctxs->queue_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) ctx->ctxs = ctxs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) q->mq_kobj = &ctxs->kobj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) q->queue_ctx = ctxs->queue_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) kfree(ctxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) * It is the actual release handler for mq, but we do it from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) * request queue's release handler for avoiding use-after-free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) * and headache because q->mq_kobj shouldn't have been introduced,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) * but we can't group ctx/kctx kobj without it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) void blk_mq_release(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) struct blk_mq_hw_ctx *hctx, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) queue_for_each_hw_ctx(q, hctx, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) /* all hctx are in .unused_hctx_list now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) list_del_init(&hctx->hctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) kobject_put(&hctx->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) kfree(q->queue_hw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) * release .mq_kobj and sw queue's kobject now because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) * both share lifetime with request queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) blk_mq_sysfs_deinit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) void *queuedata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) struct request_queue *uninit_q, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) uninit_q = blk_alloc_queue(set->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) if (!uninit_q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) uninit_q->queuedata = queuedata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) * Initialize the queue without an elevator. device_add_disk() will do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) * the initialization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) q = blk_mq_init_allocated_queue(set, uninit_q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) if (IS_ERR(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) blk_cleanup_queue(uninit_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) return q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) return blk_mq_init_queue_data(set, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) EXPORT_SYMBOL(blk_mq_init_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) * Helper for setting up a queue with mq ops, given queue depth, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) * the passed in mq ops flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) const struct blk_mq_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) unsigned int queue_depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) unsigned int set_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) memset(set, 0, sizeof(*set));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) set->ops = ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) set->nr_hw_queues = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) set->nr_maps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) set->queue_depth = queue_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) set->numa_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) set->flags = set_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) ret = blk_mq_alloc_tag_set(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) q = blk_mq_init_queue(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) if (IS_ERR(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) blk_mq_free_tag_set(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) return q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) return q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) EXPORT_SYMBOL(blk_mq_init_sq_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) struct blk_mq_tag_set *set, struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) int hctx_idx, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) struct blk_mq_hw_ctx *hctx = NULL, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) /* reuse dead hctx first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) spin_lock(&q->unused_hctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) if (tmp->numa_node == node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) hctx = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) if (hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) list_del_init(&hctx->hctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) spin_unlock(&q->unused_hctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) if (!hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) hctx = blk_mq_alloc_hctx(q, set, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) if (!hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) if (blk_mq_init_hctx(q, set, hctx, hctx_idx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) goto free_hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) return hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) free_hctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) kobject_put(&hctx->kobj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) int i, j, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) if (q->nr_hw_queues < set->nr_hw_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) struct blk_mq_hw_ctx **new_hctxs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) new_hctxs = kcalloc_node(set->nr_hw_queues,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) sizeof(*new_hctxs), GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) set->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) if (!new_hctxs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) if (hctxs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) memcpy(new_hctxs, hctxs, q->nr_hw_queues *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) sizeof(*hctxs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) q->queue_hw_ctx = new_hctxs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) kfree(hctxs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) hctxs = new_hctxs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) /* protect against switching io scheduler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) mutex_lock(&q->sysfs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) for (i = 0; i < set->nr_hw_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) * If the hw queue has been mapped to another numa node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) * we need to realloc the hctx. If allocation fails, fallback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) * to use the previous one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) if (hctxs[i] && (hctxs[i]->numa_node == node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) if (hctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) if (hctxs[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) blk_mq_exit_hctx(q, set, hctxs[i], i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) hctxs[i] = hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) if (hctxs[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) pr_warn("Allocate new hctx on node %d fails,\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) fallback to previous one on node %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) node, hctxs[i]->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) * Increasing nr_hw_queues fails. Free the newly allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) * hctxs and keep the previous q->nr_hw_queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) if (i != set->nr_hw_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) j = q->nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) end = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) j = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) end = q->nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) q->nr_hw_queues = set->nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) for (; j < end; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) struct blk_mq_hw_ctx *hctx = hctxs[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) if (hctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) if (hctx->tags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) blk_mq_free_map_and_requests(set, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) blk_mq_exit_hctx(q, set, hctx, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) hctxs[j] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) mutex_unlock(&q->sysfs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) bool elevator_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) /* mark the queue as mq asap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) q->mq_ops = set->ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) blk_mq_poll_stats_bkt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) BLK_MQ_POLL_STATS_BKTS, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) if (!q->poll_cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) goto err_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) if (blk_mq_alloc_ctxs(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) goto err_poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) /* init q->mq_kobj and sw queues' kobjects */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) blk_mq_sysfs_init(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) INIT_LIST_HEAD(&q->unused_hctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) spin_lock_init(&q->unused_hctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) blk_mq_realloc_hw_ctxs(set, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) if (!q->nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) goto err_hctxs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) q->tag_set = set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) if (set->nr_maps > HCTX_TYPE_POLL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) set->map[HCTX_TYPE_POLL].nr_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) blk_queue_flag_set(QUEUE_FLAG_POLL, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) q->sg_reserved_size = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) INIT_LIST_HEAD(&q->requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) spin_lock_init(&q->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) q->nr_requests = set->queue_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) * Default to classic polling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) q->poll_nsec = BLK_MQ_POLL_CLASSIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) blk_mq_init_cpu_queues(q, set->nr_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) blk_mq_add_queue_tag_set(set, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) blk_mq_map_swqueue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) if (elevator_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) elevator_init_mq(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) return q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) err_hctxs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) kfree(q->queue_hw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) q->nr_hw_queues = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) blk_mq_sysfs_deinit(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) err_poll:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) blk_stat_free_callback(q->poll_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) q->poll_cb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) err_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) q->mq_ops = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) EXPORT_SYMBOL(blk_mq_init_allocated_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) /* tags can _not_ be used after returning from blk_mq_exit_queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) void blk_mq_exit_queue(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) struct blk_mq_tag_set *set = q->tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) blk_mq_del_queue_tag_set(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) for (i = 0; i < set->nr_hw_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) if (!__blk_mq_alloc_map_and_request(set, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) goto out_unwind;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) out_unwind:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) while (--i >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) blk_mq_free_map_and_requests(set, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) * Allocate the request maps associated with this tag_set. Note that this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) * may reduce the depth asked for, if memory is tight. set->queue_depth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) * will be updated to reflect the allocated depth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) static int blk_mq_alloc_map_and_requests(struct blk_mq_tag_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) unsigned int depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) depth = set->queue_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) err = __blk_mq_alloc_rq_maps(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) set->queue_depth >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) } while (set->queue_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) if (!set->queue_depth || err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) pr_err("blk-mq: failed to allocate request map\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) if (depth != set->queue_depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) pr_info("blk-mq: reduced tag depth (%u -> %u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) depth, set->queue_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) * blk_mq_map_queues() and multiple .map_queues() implementations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) * number of hardware queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) if (set->nr_maps == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) if (set->ops->map_queues && !is_kdump_kernel()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) * transport .map_queues is usually done in the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) * way:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) * for (queue = 0; queue < set->nr_hw_queues; queue++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) * mask = get_cpu_mask(queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) * for_each_cpu(cpu, mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) * set->map[x].mq_map[cpu] = queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) * When we need to remap, the table has to be cleared for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) * killing stale mapping since one CPU may not be mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) * to any hw queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) for (i = 0; i < set->nr_maps; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) blk_mq_clear_mq_map(&set->map[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) return set->ops->map_queues(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) BUG_ON(set->nr_maps > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) return blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) int cur_nr_hw_queues, int new_nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) struct blk_mq_tags **new_tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) if (cur_nr_hw_queues >= new_nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) GFP_KERNEL, set->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) if (!new_tags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) if (set->tags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) memcpy(new_tags, set->tags, cur_nr_hw_queues *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) sizeof(*set->tags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) kfree(set->tags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) set->tags = new_tags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) set->nr_hw_queues = new_nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) * Alloc a tag set to be associated with one or more request queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) * May fail with EINVAL for various error conditions. May adjust the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) * requested depth down, if it's too large. In that case, the set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) * value will be stored in set->queue_depth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) if (!set->nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) if (!set->queue_depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) if (!set->ops->queue_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) if (!set->ops->get_budget ^ !set->ops->put_budget)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) pr_info("blk-mq: reduced tag depth to %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) BLK_MQ_MAX_DEPTH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) set->queue_depth = BLK_MQ_MAX_DEPTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) if (!set->nr_maps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) set->nr_maps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) else if (set->nr_maps > HCTX_MAX_TYPES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) * If a crashdump is active, then we are potentially in a very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) * memory constrained environment. Limit us to 1 queue and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) * 64 tags to prevent using too much memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) if (is_kdump_kernel()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) set->nr_hw_queues = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) set->nr_maps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) set->queue_depth = min(64U, set->queue_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) * There is no use for more h/w queues than cpus if we just have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) * a single map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) set->nr_hw_queues = nr_cpu_ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) for (i = 0; i < set->nr_maps; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) sizeof(set->map[i].mq_map[0]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) GFP_KERNEL, set->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) if (!set->map[i].mq_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) goto out_free_mq_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) ret = blk_mq_update_queue_map(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) goto out_free_mq_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) ret = blk_mq_alloc_map_and_requests(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) goto out_free_mq_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) if (blk_mq_is_sbitmap_shared(set->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) atomic_set(&set->active_queues_shared_sbitmap, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) if (blk_mq_init_shared_sbitmap(set, set->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) goto out_free_mq_rq_maps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) mutex_init(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) INIT_LIST_HEAD(&set->tag_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) out_free_mq_rq_maps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) for (i = 0; i < set->nr_hw_queues; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) blk_mq_free_map_and_requests(set, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) out_free_mq_map:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) for (i = 0; i < set->nr_maps; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) kfree(set->map[i].mq_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) set->map[i].mq_map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) kfree(set->tags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) set->tags = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) EXPORT_SYMBOL(blk_mq_alloc_tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) for (i = 0; i < set->nr_hw_queues; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) blk_mq_free_map_and_requests(set, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) if (blk_mq_is_sbitmap_shared(set->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) blk_mq_exit_shared_sbitmap(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) for (j = 0; j < set->nr_maps; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) kfree(set->map[j].mq_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) set->map[j].mq_map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) kfree(set->tags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) set->tags = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) EXPORT_SYMBOL(blk_mq_free_tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) struct blk_mq_tag_set *set = q->tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) if (!set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) if (q->nr_requests == nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) blk_mq_quiesce_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) queue_for_each_hw_ctx(q, hctx, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) if (!hctx->tags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) * If we're using an MQ scheduler, just update the scheduler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) * queue depth. This is similar to what the old code would do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) if (!hctx->sched_tags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) if (!ret && blk_mq_is_sbitmap_shared(set->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) blk_mq_tag_resize_shared_sbitmap(set, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) nr, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) if (q->elevator && q->elevator->type->ops.depth_updated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) q->elevator->type->ops.depth_updated(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) q->nr_requests = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) blk_mq_unquiesce_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) * request_queue and elevator_type pair.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) * It is just used by __blk_mq_update_nr_hw_queues to cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) * the elevator_type associated with a request_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) struct blk_mq_qe_pair {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) struct list_head node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) struct elevator_type *type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) * Cache the elevator_type in qe pair list and switch the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) * io scheduler to 'none'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) static bool blk_mq_elv_switch_none(struct list_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) struct blk_mq_qe_pair *qe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) if (!q->elevator)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) if (!qe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) INIT_LIST_HEAD(&qe->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) qe->q = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) qe->type = q->elevator->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) list_add(&qe->node, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) mutex_lock(&q->sysfs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) * After elevator_switch_mq, the previous elevator_queue will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) * released by elevator_release. The reference of the io scheduler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) * module get by elevator_get will also be put. So we need to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) * a reference of the io scheduler module here to prevent it to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) * removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) __module_get(qe->type->elevator_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) elevator_switch_mq(q, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) mutex_unlock(&q->sysfs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) static void blk_mq_elv_switch_back(struct list_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) struct blk_mq_qe_pair *qe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) struct elevator_type *t = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) list_for_each_entry(qe, head, node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) if (qe->q == q) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) t = qe->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) if (!t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) list_del(&qe->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) kfree(qe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) mutex_lock(&q->sysfs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) elevator_switch_mq(q, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) mutex_unlock(&q->sysfs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) int nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) LIST_HEAD(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) int prev_nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) lockdep_assert_held(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) nr_hw_queues = nr_cpu_ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) if (nr_hw_queues < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) list_for_each_entry(q, &set->tag_list, tag_set_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) blk_mq_freeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) * Switch IO scheduler to 'none', cleaning up the data associated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) * with the previous scheduler. We will switch back once we are done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) * updating the new sw to hw queue mappings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) list_for_each_entry(q, &set->tag_list, tag_set_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) if (!blk_mq_elv_switch_none(&head, q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) goto switch_back;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) list_for_each_entry(q, &set->tag_list, tag_set_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) blk_mq_debugfs_unregister_hctxs(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) blk_mq_sysfs_unregister(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) prev_nr_hw_queues = set->nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) goto reregister;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) set->nr_hw_queues = nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) fallback:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) blk_mq_update_queue_map(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) list_for_each_entry(q, &set->tag_list, tag_set_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) blk_mq_realloc_hw_ctxs(set, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) if (q->nr_hw_queues != set->nr_hw_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) nr_hw_queues, prev_nr_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) set->nr_hw_queues = prev_nr_hw_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) blk_mq_map_swqueue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) reregister:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) list_for_each_entry(q, &set->tag_list, tag_set_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) blk_mq_sysfs_register(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) blk_mq_debugfs_register_hctxs(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) switch_back:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) list_for_each_entry(q, &set->tag_list, tag_set_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) blk_mq_elv_switch_back(&head, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) list_for_each_entry(q, &set->tag_list, tag_set_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) blk_mq_unfreeze_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) mutex_lock(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) __blk_mq_update_nr_hw_queues(set, nr_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) mutex_unlock(&set->tag_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) /* Enable polling stats and return whether they were already enabled. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) static bool blk_poll_stats_enable(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) blk_stat_add_callback(q, q->poll_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) static void blk_mq_poll_stats_start(struct request_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) * We don't arm the callback if polling stats are not enabled or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) * callback is already active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) blk_stat_is_active(q->poll_cb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) blk_stat_activate_msecs(q->poll_cb, 100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) struct request_queue *q = cb->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) int bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) if (cb->stat[bucket].nr_samples)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) q->poll_stat[bucket] = cb->stat[bucket];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) unsigned long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) int bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) * If stats collection isn't on, don't sleep but turn it on for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) * future users
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) if (!blk_poll_stats_enable(q))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) * As an optimistic guess, use half of the mean service time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) * for this type of request. We can (and should) make this smarter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) * For instance, if the completion latencies are tight, we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) * get closer than just half the mean. This is especially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) * important on devices where the completion latencies are longer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) * than ~10 usec. We do use the stats for the relevant IO size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) * if available which does lead to better estimates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) bucket = blk_mq_poll_stats_bkt(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) if (bucket < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) if (q->poll_stat[bucket].nr_samples)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) ret = (q->poll_stat[bucket].mean + 1) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) struct hrtimer_sleeper hs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) enum hrtimer_mode mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) unsigned int nsecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) ktime_t kt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) if (rq->rq_flags & RQF_MQ_POLL_SLEPT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) * 0: use half of prev avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) * >0: use this specific value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) if (q->poll_nsec > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) nsecs = q->poll_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) nsecs = blk_mq_poll_nsecs(q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) if (!nsecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) rq->rq_flags |= RQF_MQ_POLL_SLEPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) * This will be replaced with the stats tracking code, using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) * 'avg_completion_time / 2' as the pre-sleep target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) kt = nsecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) mode = HRTIMER_MODE_REL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) hrtimer_set_expires(&hs.timer, kt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) set_current_state(TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) hrtimer_sleeper_start_expires(&hs, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) if (hs.task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) io_schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) hrtimer_cancel(&hs.timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) mode = HRTIMER_MODE_ABS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) } while (hs.task && !signal_pending(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) destroy_hrtimer_on_stack(&hs.timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) static bool blk_mq_poll_hybrid(struct request_queue *q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) if (!blk_qc_t_is_internal(cookie))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) * With scheduling, if the request has completed, we'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) * get a NULL return here, as we clear the sched tag when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) * that happens. The request still remains valid, like always,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) * so we should be safe with just the NULL check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) if (!rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) return blk_mq_poll_hybrid_sleep(q, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) * blk_poll - poll for IO completions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) * @q: the queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) * @cookie: cookie passed back at IO submission time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) * @spin: whether to spin for completions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) * Poll for completions on the passed in queue. Returns number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) * completed entries found. If @spin is true, then blk_poll will continue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) * looping until at least one completion is found, unless the task is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) * otherwise marked running (or we need to reschedule).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) struct blk_mq_hw_ctx *hctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) long state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) if (!blk_qc_t_valid(cookie) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) if (current->plug)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) blk_flush_plug_list(current->plug, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) * If we sleep, have the caller restart the poll loop to reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) * the state. Like for the other success return cases, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) * caller is responsible for checking if the IO completed. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) * the IO isn't complete, we'll get called again and will go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) * straight to the busy poll loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) if (blk_mq_poll_hybrid(q, hctx, cookie))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) hctx->poll_considered++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) state = current->state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) hctx->poll_invoked++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) ret = q->mq_ops->poll(hctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) hctx->poll_success++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) if (signal_pending_state(state, current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) if (current->state == TASK_RUNNING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) if (ret < 0 || !spin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) } while (!need_resched());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) EXPORT_SYMBOL_GPL(blk_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) unsigned int blk_mq_rq_cpu(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) return rq->mq_ctx->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) EXPORT_SYMBOL(blk_mq_rq_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) static int __init blk_mq_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) for_each_possible_cpu(i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) "block/softirq:dead", NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) blk_softirq_cpu_dead);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) blk_mq_hctx_notify_dead);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) blk_mq_hctx_notify_online,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) blk_mq_hctx_notify_offline);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) subsys_initcall(blk_mq_init);