^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (c) 2017-2018 Christoph Hellwig.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/moduleparam.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <trace/events/block.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "nvme.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) static bool multipath = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) module_param(multipath, bool, 0444);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) MODULE_PARM_DESC(multipath,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) "turn on native support for multiple controllers per subsystem");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) struct nvme_ns_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) lockdep_assert_held(&subsys->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) list_for_each_entry(h, &subsys->nsheads, entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) if (h->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) blk_mq_unfreeze_queue(h->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) struct nvme_ns_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) lockdep_assert_held(&subsys->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) list_for_each_entry(h, &subsys->nsheads, entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) if (h->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) blk_mq_freeze_queue_wait(h->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) struct nvme_ns_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) lockdep_assert_held(&subsys->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) list_for_each_entry(h, &subsys->nsheads, entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) if (h->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) blk_freeze_queue_start(h->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * If multipathing is enabled we need to always use the subsystem instance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * number for numbering our devices to avoid conflicts between subsystems that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * have multiple controllers and thus use the multipath-aware subsystem node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * and those that have a single controller and use the controller node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct nvme_ctrl *ctrl, int *flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) if (!multipath) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) } else if (ns->head->disk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) ctrl->instance, ns->head->instance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) *flags = GENHD_FL_HIDDEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) ns->head->instance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) void nvme_failover_req(struct request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct nvme_ns *ns = req->q->queuedata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) u16 status = nvme_req(req)->status & 0x7ff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) nvme_mpath_clear_current_path(ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * If we got back an ANA error, we know the controller is alive but not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * ready to serve this namespace. Kick of a re-read of the ANA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * information page, and just try any other available path for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) set_bit(NVME_NS_ANA_PENDING, &ns->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) queue_work(nvme_wq, &ns->ctrl->ana_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) spin_lock_irqsave(&ns->head->requeue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) blk_steal_bios(&ns->head->requeue_list, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) blk_mq_end_request(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) kblockd_schedule_work(&ns->head->requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct nvme_ns *ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) down_read(&ctrl->namespaces_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) list_for_each_entry(ns, &ctrl->namespaces, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (ns->head->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) kblockd_schedule_work(&ns->head->requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) up_read(&ctrl->namespaces_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) static const char *nvme_ana_state_names[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) [0] = "invalid state",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) [NVME_ANA_OPTIMIZED] = "optimized",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) [NVME_ANA_NONOPTIMIZED] = "non-optimized",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) [NVME_ANA_INACCESSIBLE] = "inaccessible",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) [NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) [NVME_ANA_CHANGE] = "change",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) struct nvme_ns_head *head = ns->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) bool changed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) if (ns == rcu_access_pointer(head->current_path[node])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) rcu_assign_pointer(head->current_path[node], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) changed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct nvme_ns *ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) down_read(&ctrl->namespaces_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) list_for_each_entry(ns, &ctrl->namespaces, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) nvme_mpath_clear_current_path(ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) kblockd_schedule_work(&ns->head->requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) up_read(&ctrl->namespaces_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static bool nvme_path_is_disabled(struct nvme_ns *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * still be able to complete assuming that the controller is connected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * Otherwise it will fail immediately and return to the requeue list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) if (ns->ctrl->state != NVME_CTRL_LIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) ns->ctrl->state != NVME_CTRL_DELETING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) test_bit(NVME_NS_REMOVING, &ns->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) struct nvme_ns *found = NULL, *fallback = NULL, *ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) list_for_each_entry_rcu(ns, &head->list, siblings) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) if (nvme_path_is_disabled(ns))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) distance = node_distance(node, ns->ctrl->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) distance = LOCAL_DISTANCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) switch (ns->ana_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) case NVME_ANA_OPTIMIZED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) if (distance < found_distance) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) found_distance = distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) found = ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) case NVME_ANA_NONOPTIMIZED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (distance < fallback_distance) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) fallback_distance = distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) fallback = ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (!found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) found = fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) rcu_assign_pointer(head->current_path[node], found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct nvme_ns *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) ns = list_next_or_null_rcu(&head->list, &ns->siblings, struct nvme_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) siblings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) if (ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) return ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) int node, struct nvme_ns *old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct nvme_ns *ns, *found = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (list_is_singular(&head->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if (nvme_path_is_disabled(old))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) return old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) for (ns = nvme_next_ns(head, old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) ns && ns != old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) ns = nvme_next_ns(head, ns)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) if (nvme_path_is_disabled(ns))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) if (ns->ana_state == NVME_ANA_OPTIMIZED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) found = ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) found = ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * The loop above skips the current path for round-robin semantics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * Fall back to the current path if either:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * - no other optimized path found and current is optimized,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * - no other usable path found and current is usable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) if (!nvme_path_is_disabled(old) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) (old->ana_state == NVME_ANA_OPTIMIZED ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) (!found && old->ana_state == NVME_ANA_NONOPTIMIZED)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) return old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (!found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) rcu_assign_pointer(head->current_path[node], found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) return ns->ctrl->state == NVME_CTRL_LIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) ns->ana_state == NVME_ANA_OPTIMIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) int node = numa_node_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct nvme_ns *ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) ns = srcu_dereference(head->current_path[node], &head->srcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (unlikely(!ns))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) return __nvme_find_path(head, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return nvme_round_robin_path(head, node, ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) if (unlikely(!nvme_path_is_optimized(ns)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) return __nvme_find_path(head, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) return ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) static bool nvme_available_path(struct nvme_ns_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) struct nvme_ns *ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) list_for_each_entry_rcu(ns, &head->list, siblings) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) switch (ns->ctrl->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) case NVME_CTRL_LIVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) case NVME_CTRL_RESETTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) case NVME_CTRL_CONNECTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) /* fallthru */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) struct nvme_ns_head *head = bio->bi_disk->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) struct device *dev = disk_to_dev(head->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) struct nvme_ns *ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) blk_qc_t ret = BLK_QC_T_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) int srcu_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * The namespace might be going away and the bio might be moved to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * different queue via blk_steal_bios(), so we need to use the bio_split
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * pool from the original queue to allocate the bvecs from.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) blk_queue_split(&bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) srcu_idx = srcu_read_lock(&head->srcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) ns = nvme_find_path(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (likely(ns)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) bio->bi_disk = ns->disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) bio->bi_opf |= REQ_NVME_MPATH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) trace_block_bio_remap(bio->bi_disk->queue, bio,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) disk_devt(ns->head->disk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) bio->bi_iter.bi_sector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) ret = submit_bio_noacct(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) } else if (nvme_available_path(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) spin_lock_irq(&head->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) bio_list_add(&head->requeue_list, bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) spin_unlock_irq(&head->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) dev_warn_ratelimited(dev, "no available path - failing I/O\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) bio->bi_status = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) bio_endio(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) srcu_read_unlock(&head->srcu, srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) static void nvme_requeue_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) struct nvme_ns_head *head =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) container_of(work, struct nvme_ns_head, requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) struct bio *bio, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) spin_lock_irq(&head->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) next = bio_list_get(&head->requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) spin_unlock_irq(&head->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) while ((bio = next) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) next = bio->bi_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) bio->bi_next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * Reset disk to the mpath node and resubmit to select a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) bio->bi_disk = head->disk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) submit_bio_noacct(bio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) struct request_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) bool vwc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) mutex_init(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) bio_list_init(&head->requeue_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) spin_lock_init(&head->requeue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) INIT_WORK(&head->requeue_work, nvme_requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) * Add a multipath node if the subsystems supports multiple controllers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) * We also do this for private namespaces as the namespace sharing data could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) * change after a rescan.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) q = blk_alloc_queue(ctrl->numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) /* set to a default value for 512 until disk is validated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) blk_queue_logical_block_size(q, 512);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) blk_set_stacking_limits(&q->limits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) /* we need to propagate up the VMC settings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) vwc = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) blk_queue_write_cache(q, vwc, vwc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) head->disk = alloc_disk(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (!head->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) goto out_cleanup_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) head->disk->fops = &nvme_ns_head_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) head->disk->private_data = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) head->disk->queue = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) head->disk->flags = GENHD_FL_EXT_DEVT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) sprintf(head->disk->disk_name, "nvme%dn%d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) ctrl->subsys->instance, head->instance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) out_cleanup_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) blk_cleanup_queue(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) static void nvme_mpath_set_live(struct nvme_ns *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) struct nvme_ns_head *head = ns->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) if (!head->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) device_add_disk(&head->subsys->dev, head->disk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) nvme_ns_id_attr_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) mutex_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (nvme_path_is_optimized(ns)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) int node, srcu_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) srcu_idx = srcu_read_lock(&head->srcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) __nvme_find_path(head, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) srcu_read_unlock(&head->srcu, srcu_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) mutex_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) synchronize_srcu(&head->srcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) kblockd_schedule_work(&head->requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) int (*cb)(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) void *))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) void *base = ctrl->ana_log_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) size_t offset = sizeof(struct nvme_ana_rsp_hdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) int error, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) lockdep_assert_held(&ctrl->ana_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) struct nvme_ana_group_desc *desc = base + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) u32 nr_nsids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) size_t nsid_buf_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) nr_nsids = le32_to_cpu(desc->nnsids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) nsid_buf_size = nr_nsids * sizeof(__le32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (WARN_ON_ONCE(desc->grpid == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) if (WARN_ON_ONCE(desc->state == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) offset += sizeof(*desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) error = cb(ctrl, desc, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) offset += nsid_buf_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) static inline bool nvme_state_is_live(enum nvme_ana_state state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) return state == NVME_ANA_OPTIMIZED || state == NVME_ANA_NONOPTIMIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) struct nvme_ns *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) ns->ana_grpid = le32_to_cpu(desc->grpid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) ns->ana_state = desc->state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) if (nvme_state_is_live(ns->ana_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) nvme_mpath_set_live(ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct nvme_ana_group_desc *desc, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) unsigned *nr_change_groups = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) struct nvme_ns *ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) dev_dbg(ctrl->device, "ANA group %d: %s.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) le32_to_cpu(desc->grpid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) nvme_ana_state_names[desc->state]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) if (desc->state == NVME_ANA_CHANGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) (*nr_change_groups)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) if (!nr_nsids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) down_read(&ctrl->namespaces_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) list_for_each_entry(ns, &ctrl->namespaces, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) unsigned nsid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) nsid = le32_to_cpu(desc->nsids[n]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (ns->head->ns_id < nsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) if (ns->head->ns_id == nsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) nvme_update_ns_ana_state(desc, ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) if (++n == nr_nsids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (ns->head->ns_id > nsid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) up_read(&ctrl->namespaces_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) u32 nr_change_groups = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) mutex_lock(&ctrl->ana_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, NVME_CSI_NVM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) ctrl->ana_log_buf, ctrl->ana_log_size, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) error = nvme_parse_ana_log(ctrl, &nr_change_groups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) nvme_update_ana_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * In theory we should have an ANATT timer per group as they might enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * the change state at different times. But that is a lot of overhead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) * just to protect against a target that keeps entering new changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * states while never finishing previous ones. But we'll still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) * eventually time out once all groups are in change state, so this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) * isn't a big deal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) * We also double the ANATT value to provide some slack for transports
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) * or AEN processing overhead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) if (nr_change_groups)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) del_timer_sync(&ctrl->anatt_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) mutex_unlock(&ctrl->ana_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) static void nvme_ana_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) if (ctrl->state != NVME_CTRL_LIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) nvme_read_ana_log(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static void nvme_anatt_timeout(struct timer_list *t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) dev_info(ctrl->device, "ANATT timeout, resetting controller.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) nvme_reset_ctrl(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) void nvme_mpath_stop(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) if (!nvme_ctrl_use_ana(ctrl))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) del_timer_sync(&ctrl->anatt_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) cancel_work_sync(&ctrl->ana_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) #define SUBSYS_ATTR_RW(_name, _mode, _show, _store) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) struct device_attribute subsys_attr_##_name = \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) __ATTR(_name, _mode, _show, _store)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) static const char *nvme_iopolicy_names[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) [NVME_IOPOLICY_NUMA] = "numa",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) [NVME_IOPOLICY_RR] = "round-robin",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) struct device_attribute *attr, char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) struct nvme_subsystem *subsys =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) container_of(dev, struct nvme_subsystem, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) return sprintf(buf, "%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) struct device_attribute *attr, const char *buf, size_t count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) struct nvme_subsystem *subsys =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) container_of(dev, struct nvme_subsystem, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) WRITE_ONCE(subsys->iopolicy, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) DEVICE_ATTR_RO(ana_grpid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) DEVICE_ATTR_RO(ana_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) struct nvme_ana_group_desc *desc, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) struct nvme_ana_group_desc *dst = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) if (desc->grpid != dst->grpid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) *dst = *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) return -ENXIO; /* just break out of the loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) if (nvme_ctrl_use_ana(ns->ctrl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) struct nvme_ana_group_desc desc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) .grpid = id->anagrpid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) .state = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) mutex_lock(&ns->ctrl->ana_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) ns->ana_grpid = le32_to_cpu(id->anagrpid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) mutex_unlock(&ns->ctrl->ana_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) if (desc.state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) /* found the group desc: update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) nvme_update_ns_ana_state(&desc, ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) /* group desc not found: trigger a re-read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) set_bit(NVME_NS_ANA_PENDING, &ns->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) queue_work(nvme_wq, &ns->ctrl->ana_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) ns->ana_state = NVME_ANA_OPTIMIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) nvme_mpath_set_live(ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) ns->head->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) #ifdef CONFIG_BLK_DEV_ZONED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) if (blk_queue_is_zoned(ns->queue) && ns->head->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) ns->head->disk->queue->nr_zones = ns->queue->nr_zones;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) void nvme_mpath_remove_disk(struct nvme_ns_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) if (!head->disk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) if (head->disk->flags & GENHD_FL_UP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) del_gendisk(head->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) blk_set_queue_dying(head->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) /* make sure all pending bios are cleaned up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) kblockd_schedule_work(&head->requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) flush_work(&head->requeue_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) blk_cleanup_queue(head->disk->queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * if device_add_disk wasn't called, prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) * disk release to put a bogus reference on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) * request queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) head->disk->queue = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) put_disk(head->disk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) mutex_init(&ctrl->ana_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) INIT_WORK(&ctrl->ana_work, nvme_ana_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) size_t ana_log_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) int error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) /* check if multipath is enabled and we have the capability */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (!multipath || !ctrl->subsys ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) ctrl->anacap = id->anacap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) ctrl->anatt = id->anatt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) ctrl->max_namespaces * sizeof(__le32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) if (ana_log_size > max_transfer_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) dev_err(ctrl->device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) "ANA log page size (%zd) larger than MDTS (%zd).\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) ana_log_size, max_transfer_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) dev_err(ctrl->device, "disabling ANA support.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) goto out_uninit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) if (ana_log_size > ctrl->ana_log_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) nvme_mpath_stop(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) kfree(ctrl->ana_log_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) if (!ctrl->ana_log_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) ctrl->ana_log_size = ana_log_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) error = nvme_read_ana_log(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) goto out_uninit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) out_uninit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) nvme_mpath_uninit(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) kfree(ctrl->ana_log_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) ctrl->ana_log_buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)