^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * NVMe over Fabrics RDMA host code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (c) 2015-2016 HGST, a Western Digital Company.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <rdma/mr_pool.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/blk-mq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/blk-mq-rdma.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/scatterlist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/nvme.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <asm/unaligned.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <rdma/ib_verbs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <rdma/rdma_cm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/nvme-rdma.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "nvme.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "fabrics.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define NVME_RDMA_MAX_SEGMENTS 256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define NVME_RDMA_MAX_INLINE_SEGMENTS 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define NVME_RDMA_DATA_SGL_SIZE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define NVME_RDMA_METADATA_SGL_SIZE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct nvme_rdma_device {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) struct ib_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) struct ib_pd *pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) struct kref ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) struct list_head entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) unsigned int num_inline_segments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) struct nvme_rdma_qe {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct ib_cqe cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) void *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) u64 dma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) struct nvme_rdma_sgl {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) int nents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct sg_table sg_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct nvme_rdma_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct nvme_rdma_request {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct nvme_request req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct ib_mr *mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) struct nvme_rdma_qe sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) union nvme_result result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) __le16 status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) refcount_t ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) u32 num_sge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct ib_reg_wr reg_wr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct ib_cqe reg_cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) struct nvme_rdma_queue *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct nvme_rdma_sgl data_sgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) struct nvme_rdma_sgl *metadata_sgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) bool use_sig_mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) enum nvme_rdma_queue_flags {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) NVME_RDMA_Q_ALLOCATED = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) NVME_RDMA_Q_LIVE = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) NVME_RDMA_Q_TR_READY = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) struct nvme_rdma_queue {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) struct nvme_rdma_qe *rsp_ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) int queue_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) size_t cmnd_capsule_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) struct nvme_rdma_ctrl *ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) struct nvme_rdma_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) struct ib_cq *ib_cq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct ib_qp *qp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) struct rdma_cm_id *cm_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) int cm_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) struct completion cm_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) bool pi_support;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) int cq_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct mutex queue_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) struct nvme_rdma_ctrl {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* read only in the hot path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) struct nvme_rdma_queue *queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) /* other member variables */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) struct blk_mq_tag_set tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct work_struct err_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct nvme_rdma_qe async_event_sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) struct delayed_work reconnect_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) struct blk_mq_tag_set admin_tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) struct nvme_rdma_device *device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) u32 max_fr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) struct sockaddr_storage addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct sockaddr_storage src_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct nvme_ctrl ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) bool use_inline_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) u32 io_queues[HCTX_MAX_TYPES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) static LIST_HEAD(device_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) static DEFINE_MUTEX(device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) static LIST_HEAD(nvme_rdma_ctrl_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * Disabling this option makes small I/O goes faster, but is fundamentally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * unsafe. With it turned off we will have to register a global rkey that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * allows read and write access to all physical memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static bool register_always = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) module_param(register_always, bool, 0444);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) MODULE_PARM_DESC(register_always,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) "Use memory registration even for contiguous memory regions");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) struct rdma_cm_event *event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) static void nvme_rdma_complete_rq(struct request *rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) static const struct blk_mq_ops nvme_rdma_mq_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return queue - queue->ctrl->queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) return nvme_rdma_queue_idx(queue) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) queue->ctrl->io_queues[HCTX_TYPE_DEFAULT] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) queue->ctrl->io_queues[HCTX_TYPE_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) return queue->cmnd_capsule_len - sizeof(struct nvme_command);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) static void nvme_rdma_free_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) size_t capsule_size, enum dma_data_direction dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) ib_dma_unmap_single(ibdev, qe->dma, capsule_size, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) kfree(qe->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) static int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) size_t capsule_size, enum dma_data_direction dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) qe->data = kzalloc(capsule_size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (!qe->data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (ib_dma_mapping_error(ibdev, qe->dma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) kfree(qe->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) qe->data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) static void nvme_rdma_free_ring(struct ib_device *ibdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) struct nvme_rdma_qe *ring, size_t ib_queue_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) size_t capsule_size, enum dma_data_direction dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) for (i = 0; i < ib_queue_size; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) nvme_rdma_free_qe(ibdev, &ring[i], capsule_size, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) kfree(ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) size_t ib_queue_size, size_t capsule_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) enum dma_data_direction dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) struct nvme_rdma_qe *ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) ring = kcalloc(ib_queue_size, sizeof(struct nvme_rdma_qe), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (!ring)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * Bind the CQEs (post recv buffers) DMA mapping to the RDMA queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * lifetime. It's safe, since any chage in the underlying RDMA device
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * will issue error recovery and queue re-creation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) for (i = 0; i < ib_queue_size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) goto out_free_ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) return ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) out_free_ring:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) nvme_rdma_free_ring(ibdev, ring, i, capsule_size, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) static void nvme_rdma_qp_event(struct ib_event *event, void *context)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) pr_debug("QP event %s (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) ib_event_msg(event->event), event->event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) ret = wait_for_completion_interruptible_timeout(&queue->cm_done,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) return -ETIMEDOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) WARN_ON_ONCE(queue->cm_error > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) return queue->cm_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) struct nvme_rdma_device *dev = queue->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct ib_qp_init_attr init_attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) memset(&init_attr, 0, sizeof(init_attr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) init_attr.event_handler = nvme_rdma_qp_event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) /* +1 for drain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) init_attr.cap.max_send_wr = factor * queue->queue_size + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) /* +1 for drain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) init_attr.cap.max_recv_wr = queue->queue_size + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) init_attr.cap.max_recv_sge = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) init_attr.qp_type = IB_QPT_RC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) init_attr.send_cq = queue->ib_cq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) init_attr.recv_cq = queue->ib_cq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) if (queue->pi_support)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) init_attr.qp_context = queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) queue->qp = queue->cm_id->qp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) struct request *rq, unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) kfree(req->sqe.data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) struct request *rq, unsigned int hctx_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) unsigned int numa_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) struct nvme_rdma_ctrl *ctrl = set->driver_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) nvme_req(rq)->ctrl = &ctrl->ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) req->sqe.data = kzalloc(sizeof(struct nvme_command), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (!req->sqe.data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /* metadata nvme_rdma_sgl struct is located after command's data SGL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (queue->pi_support)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) req->metadata_sgl = (void *)nvme_req(rq) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) sizeof(struct nvme_rdma_request) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) NVME_RDMA_DATA_SGL_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) req->queue = queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) struct nvme_rdma_ctrl *ctrl = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) hctx->driver_data = queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) unsigned int hctx_idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) struct nvme_rdma_ctrl *ctrl = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) struct nvme_rdma_queue *queue = &ctrl->queues[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) BUG_ON(hctx_idx != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) hctx->driver_data = queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) static void nvme_rdma_free_dev(struct kref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct nvme_rdma_device *ndev =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) container_of(ref, struct nvme_rdma_device, ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) mutex_lock(&device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) list_del(&ndev->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) mutex_unlock(&device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) ib_dealloc_pd(ndev->pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) kfree(ndev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) kref_put(&dev->ref, nvme_rdma_free_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) return kref_get_unless_zero(&dev->ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) static struct nvme_rdma_device *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) struct nvme_rdma_device *ndev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) mutex_lock(&device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) list_for_each_entry(ndev, &device_list, entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (ndev->dev->node_guid == cm_id->device->node_guid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) nvme_rdma_dev_get(ndev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (!ndev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) ndev->dev = cm_id->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) kref_init(&ndev->ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) ndev->pd = ib_alloc_pd(ndev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) if (IS_ERR(ndev->pd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) goto out_free_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (!(ndev->dev->attrs.device_cap_flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) IB_DEVICE_MEM_MGT_EXTENSIONS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) dev_err(&ndev->dev->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) "Memory registrations not supported.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) goto out_free_pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) ndev->dev->attrs.max_send_sge - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) list_add(&ndev->entry, &device_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) mutex_unlock(&device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) return ndev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) out_free_pd:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) ib_dealloc_pd(ndev->pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) out_free_dev:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) kfree(ndev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) mutex_unlock(&device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) static void nvme_rdma_free_cq(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) if (nvme_rdma_poll_queue(queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) ib_free_cq(queue->ib_cq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) ib_cq_pool_put(queue->ib_cq, queue->cq_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct nvme_rdma_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) struct ib_device *ibdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) dev = queue->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) ibdev = dev->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) if (queue->pi_support)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * The cm_id object might have been destroyed during RDMA connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * establishment error flow to avoid getting other cma events, thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) * the destruction of the QP shouldn't use rdma_cm API.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) ib_destroy_qp(queue->qp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) nvme_rdma_free_cq(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) sizeof(struct nvme_completion), DMA_FROM_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) nvme_rdma_dev_put(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) u32 max_page_list_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) if (pi_support)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) static int nvme_rdma_create_cq(struct ib_device *ibdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) enum ib_poll_context poll_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) * Spread I/O queues completion vectors according their queue index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * Admin queues can always go on completion vector 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /* Polling queues need direct cq polling context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) if (nvme_rdma_poll_queue(queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) poll_ctx = IB_POLL_DIRECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) comp_vector, poll_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) poll_ctx = IB_POLL_SOFTIRQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) comp_vector, poll_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) if (IS_ERR(queue->ib_cq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) ret = PTR_ERR(queue->ib_cq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) struct ib_device *ibdev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) const int send_wr_factor = 3; /* MR, SEND, INV */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) const int cq_factor = send_wr_factor + 1; /* + RECV */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) int ret, pages_per_mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) queue->device = nvme_rdma_find_get_device(queue->cm_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) if (!queue->device) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) dev_err(queue->cm_id->device->dev.parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) "no client data found!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) return -ECONNREFUSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) ibdev = queue->device->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) /* +1 for ib_stop_cq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) queue->cq_size = cq_factor * queue->queue_size + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) ret = nvme_rdma_create_cq(ibdev, queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) goto out_put_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) ret = nvme_rdma_create_qp(queue, send_wr_factor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) goto out_destroy_ib_cq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) queue->rsp_ring = nvme_rdma_alloc_ring(ibdev, queue->queue_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) sizeof(struct nvme_completion), DMA_FROM_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if (!queue->rsp_ring) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) goto out_destroy_qp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * Currently we don't use SG_GAPS MR's so if the first entry is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * misaligned we'll end up using two entries for a single data page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * so one additional entry is required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) queue->queue_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) IB_MR_TYPE_MEM_REG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) pages_per_mr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) "failed to initialize MR pool sized %d for QID %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) queue->queue_size, nvme_rdma_queue_idx(queue));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) goto out_destroy_ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (queue->pi_support) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) queue->queue_size, IB_MR_TYPE_INTEGRITY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) pages_per_mr, pages_per_mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) "failed to initialize PI MR pool sized %d for QID %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) queue->queue_size, nvme_rdma_queue_idx(queue));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) goto out_destroy_mr_pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) out_destroy_mr_pool:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) out_destroy_ring:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) sizeof(struct nvme_completion), DMA_FROM_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) out_destroy_qp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) rdma_destroy_qp(queue->cm_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) out_destroy_ib_cq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) nvme_rdma_free_cq(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) out_put_dev:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) nvme_rdma_dev_put(queue->device);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) int idx, size_t queue_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct nvme_rdma_queue *queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) struct sockaddr *src_addr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) queue = &ctrl->queues[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) mutex_init(&queue->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) queue->ctrl = ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) if (idx && ctrl->ctrl.max_integrity_segments)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) queue->pi_support = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) queue->pi_support = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) init_completion(&queue->cm_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (idx > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) queue->cmnd_capsule_len = sizeof(struct nvme_command);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) queue->queue_size = queue_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) RDMA_PS_TCP, IB_QPT_RC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (IS_ERR(queue->cm_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) dev_info(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) ret = PTR_ERR(queue->cm_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) goto out_destroy_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) src_addr = (struct sockaddr *)&ctrl->src_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) queue->cm_error = -ETIMEDOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) ret = rdma_resolve_addr(queue->cm_id, src_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) (struct sockaddr *)&ctrl->addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) NVME_RDMA_CONNECT_TIMEOUT_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) dev_info(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) "rdma_resolve_addr failed (%d).\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) goto out_destroy_cm_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) ret = nvme_rdma_wait_for_cm(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) dev_info(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) "rdma connection establishment failed (%d)\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) goto out_destroy_cm_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) out_destroy_cm_id:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) rdma_destroy_id(queue->cm_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) nvme_rdma_destroy_queue_ib(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) out_destroy_mutex:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) mutex_destroy(&queue->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) rdma_disconnect(queue->cm_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) ib_drain_qp(queue->qp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) mutex_lock(&queue->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) __nvme_rdma_stop_queue(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) mutex_unlock(&queue->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) rdma_destroy_id(queue->cm_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) nvme_rdma_destroy_queue_ib(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) mutex_destroy(&queue->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) for (i = 1; i < ctrl->ctrl.queue_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) nvme_rdma_free_queue(&ctrl->queues[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) for (i = 1; i < ctrl->ctrl.queue_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) nvme_rdma_stop_queue(&ctrl->queues[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) struct nvme_rdma_queue *queue = &ctrl->queues[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) bool poll = nvme_rdma_poll_queue(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) if (idx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) ret = nvmf_connect_admin_queue(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) __nvme_rdma_stop_queue(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) dev_info(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) "failed to connect queue: %d ret=%d\n", idx, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) int i, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) for (i = 1; i < ctrl->ctrl.queue_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) ret = nvme_rdma_start_queue(ctrl, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) goto out_stop_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) out_stop_queues:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) for (i--; i >= 1; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) nvme_rdma_stop_queue(&ctrl->queues[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) struct ib_device *ibdev = ctrl->device->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) unsigned int nr_io_queues, nr_default_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) unsigned int nr_read_queues, nr_poll_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) min(opts->nr_io_queues, num_online_cpus()));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) nr_default_queues = min_t(unsigned int, ibdev->num_comp_vectors,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) min(opts->nr_write_queues, num_online_cpus()));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (nr_io_queues == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) dev_err(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) "unable to set any I/O queues\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) ctrl->ctrl.queue_count = nr_io_queues + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) dev_info(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) "creating %d I/O queues.\n", nr_io_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) * separate read/write queues
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) * hand out dedicated default queues only after we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * sufficient read queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) ctrl->io_queues[HCTX_TYPE_DEFAULT] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) min(nr_default_queues, nr_io_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) * shared read/write queues
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * either no write queues were requested, or we don't have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * sufficient queue count to have dedicated default queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) ctrl->io_queues[HCTX_TYPE_DEFAULT] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) min(nr_read_queues, nr_io_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) if (opts->nr_poll_queues && nr_io_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) /* map dedicated poll queues only if we have queues left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) ctrl->io_queues[HCTX_TYPE_POLL] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) min(nr_poll_queues, nr_io_queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) for (i = 1; i < ctrl->ctrl.queue_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) ret = nvme_rdma_alloc_queue(ctrl, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) ctrl->ctrl.sqsize + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) goto out_free_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) out_free_queues:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) for (i--; i >= 1; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) nvme_rdma_free_queue(&ctrl->queues[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) bool admin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) struct blk_mq_tag_set *set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) if (admin) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) set = &ctrl->admin_tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) memset(set, 0, sizeof(*set));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) set->ops = &nvme_rdma_admin_mq_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) set->reserved_tags = 2; /* connect + keep-alive */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) set->numa_node = nctrl->numa_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) set->cmd_size = sizeof(struct nvme_rdma_request) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) NVME_RDMA_DATA_SGL_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) set->driver_data = ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) set->nr_hw_queues = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) set->timeout = ADMIN_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) set->flags = BLK_MQ_F_NO_SCHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) set = &ctrl->tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) memset(set, 0, sizeof(*set));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) set->ops = &nvme_rdma_mq_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) set->queue_depth = nctrl->sqsize + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) set->reserved_tags = 1; /* fabric connect */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) set->numa_node = nctrl->numa_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) set->flags = BLK_MQ_F_SHOULD_MERGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) set->cmd_size = sizeof(struct nvme_rdma_request) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) NVME_RDMA_DATA_SGL_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) if (nctrl->max_integrity_segments)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) set->cmd_size += sizeof(struct nvme_rdma_sgl) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) NVME_RDMA_METADATA_SGL_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) set->driver_data = ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) set->nr_hw_queues = nctrl->queue_count - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) set->timeout = NVME_IO_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) ret = blk_mq_alloc_tag_set(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) return set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) bool remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) if (remove) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) blk_cleanup_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) blk_cleanup_queue(ctrl->ctrl.fabrics_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) if (ctrl->async_event_sqe.data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) cancel_work_sync(&ctrl->ctrl.async_event_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) sizeof(struct nvme_command), DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) ctrl->async_event_sqe.data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) nvme_rdma_free_queue(&ctrl->queues[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) bool new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) bool pi_capable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) ctrl->device = ctrl->queues[0].device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) /* T10-PI support */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (ctrl->device->dev->attrs.device_cap_flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) IB_DEVICE_INTEGRITY_HANDOVER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) pi_capable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) pi_capable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) * Bind the async event SQE DMA mapping to the admin queue lifetime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) * It's safe, since any chage in the underlying RDMA device will issue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) * error recovery and queue re-creation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) sizeof(struct nvme_command), DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) goto out_free_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) if (new) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) if (IS_ERR(ctrl->ctrl.admin_tagset)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) error = PTR_ERR(ctrl->ctrl.admin_tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) goto out_free_async_qe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (IS_ERR(ctrl->ctrl.fabrics_q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) error = PTR_ERR(ctrl->ctrl.fabrics_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) goto out_free_tagset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) if (IS_ERR(ctrl->ctrl.admin_q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) error = PTR_ERR(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) goto out_cleanup_fabrics_q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) error = nvme_rdma_start_queue(ctrl, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) goto out_cleanup_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) error = nvme_enable_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) goto out_stop_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) ctrl->ctrl.max_segments = ctrl->max_fr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) if (pi_capable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) ctrl->ctrl.max_integrity_segments = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) error = nvme_init_identify(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) if (error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) goto out_quiesce_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) out_quiesce_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) blk_sync_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) out_stop_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) nvme_rdma_stop_queue(&ctrl->queues[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) nvme_cancel_admin_tagset(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) out_cleanup_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) blk_cleanup_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) out_cleanup_fabrics_q:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) if (new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) blk_cleanup_queue(ctrl->ctrl.fabrics_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) out_free_tagset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) if (new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) out_free_async_qe:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) if (ctrl->async_event_sqe.data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) sizeof(struct nvme_command), DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) ctrl->async_event_sqe.data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) out_free_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) nvme_rdma_free_queue(&ctrl->queues[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) return error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) bool remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) if (remove) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) blk_cleanup_queue(ctrl->ctrl.connect_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) blk_mq_free_tag_set(ctrl->ctrl.tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) nvme_rdma_free_io_queues(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) ret = nvme_rdma_alloc_io_queues(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (new) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (IS_ERR(ctrl->ctrl.tagset)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) ret = PTR_ERR(ctrl->ctrl.tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) goto out_free_io_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) if (IS_ERR(ctrl->ctrl.connect_q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) ret = PTR_ERR(ctrl->ctrl.connect_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) goto out_free_tag_set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) ret = nvme_rdma_start_io_queues(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) goto out_cleanup_connect_q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) if (!new) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) nvme_start_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * If we timed out waiting for freeze we are likely to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) * be stuck. Fail the controller initialization just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) * to be safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) goto out_wait_freeze_timed_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) ctrl->ctrl.queue_count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) nvme_unfreeze(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) out_wait_freeze_timed_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) nvme_stop_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) nvme_sync_io_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) nvme_rdma_stop_io_queues(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) out_cleanup_connect_q:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) nvme_cancel_tagset(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) blk_cleanup_queue(ctrl->ctrl.connect_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) out_free_tag_set:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) blk_mq_free_tag_set(ctrl->ctrl.tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) out_free_io_queues:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) nvme_rdma_free_io_queues(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) bool remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) blk_sync_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) nvme_rdma_stop_queue(&ctrl->queues[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) if (ctrl->ctrl.admin_tagset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) nvme_cancel_request, &ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) blk_mq_tagset_wait_completed_request(ctrl->ctrl.admin_tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) if (remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) nvme_rdma_destroy_admin_queue(ctrl, remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) bool remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) if (ctrl->ctrl.queue_count > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) nvme_start_freeze(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) nvme_stop_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) nvme_sync_io_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) nvme_rdma_stop_io_queues(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) if (ctrl->ctrl.tagset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) blk_mq_tagset_busy_iter(ctrl->ctrl.tagset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) nvme_cancel_request, &ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) blk_mq_tagset_wait_completed_request(ctrl->ctrl.tagset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (remove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) nvme_start_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) nvme_rdma_destroy_io_queues(ctrl, remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) if (list_empty(&ctrl->list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) goto free_ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) mutex_lock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) list_del(&ctrl->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) mutex_unlock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) nvmf_free_options(nctrl->opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) free_ctrl:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) kfree(ctrl->queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) kfree(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) /* If we are resetting/deleting then do nothing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) ctrl->ctrl.state == NVME_CTRL_LIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) if (nvmf_should_reconnect(&ctrl->ctrl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) ctrl->ctrl.opts->reconnect_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) ctrl->ctrl.opts->reconnect_delay * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) nvme_delete_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) bool changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) ret = nvme_rdma_configure_admin_queue(ctrl, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) if (ctrl->ctrl.icdoff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) goto destroy_admin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) if (!(ctrl->ctrl.sgls & (1 << 2))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) dev_err(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) "Mandatory keyed sgls are not supported!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) goto destroy_admin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) dev_warn(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) "queue_size %zu > ctrl sqsize %u, clamping down\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) dev_warn(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) "sqsize %u > ctrl maxcmd %u, clamping down\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) if (ctrl->ctrl.sgls & (1 << 20))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) ctrl->use_inline_data = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) if (ctrl->ctrl.queue_count > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) ret = nvme_rdma_configure_io_queues(ctrl, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) goto destroy_admin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) if (!changed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) * state change failure is ok if we started ctrl delete,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) * unless we're during creation of a new controller to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) * avoid races with teardown flow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) WARN_ON_ONCE(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) goto destroy_io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) nvme_start_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) destroy_io:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) if (ctrl->ctrl.queue_count > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) nvme_stop_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) nvme_sync_io_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) nvme_rdma_stop_io_queues(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) nvme_cancel_tagset(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) nvme_rdma_destroy_io_queues(ctrl, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) destroy_admin:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) blk_sync_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) nvme_rdma_stop_queue(&ctrl->queues[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) nvme_cancel_admin_tagset(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) nvme_rdma_destroy_admin_queue(ctrl, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) struct nvme_rdma_ctrl, reconnect_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) ++ctrl->ctrl.nr_reconnects;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) if (nvme_rdma_setup_ctrl(ctrl, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) goto requeue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) ctrl->ctrl.nr_reconnects);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) ctrl->ctrl.nr_reconnects = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) requeue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) ctrl->ctrl.nr_reconnects);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) nvme_rdma_reconnect_or_remove(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) static void nvme_rdma_error_recovery_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) struct nvme_rdma_ctrl *ctrl = container_of(work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) struct nvme_rdma_ctrl, err_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) nvme_stop_keep_alive(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) flush_work(&ctrl->ctrl.async_event_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) nvme_rdma_teardown_io_queues(ctrl, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) nvme_start_queues(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) nvme_rdma_teardown_admin_queue(ctrl, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) /* state change failure is ok if we started ctrl delete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) nvme_rdma_reconnect_or_remove(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) dev_warn(ctrl->ctrl.device, "starting error recovery\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) queue_work(nvme_reset_wq, &ctrl->err_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) static void nvme_rdma_end_request(struct nvme_rdma_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) struct request *rq = blk_mq_rq_from_pdu(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) if (!refcount_dec_and_test(&req->ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) if (!nvme_try_complete_req(rq, req->status, req->result))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) nvme_rdma_complete_rq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) const char *op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) struct nvme_rdma_queue *queue = wc->qp->qp_context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) struct nvme_rdma_ctrl *ctrl = queue->ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) if (ctrl->ctrl.state == NVME_CTRL_LIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) dev_info(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) "%s for CQE 0x%p failed with status %s (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) op, wc->wr_cqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) ib_wc_status_msg(wc->status), wc->status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) nvme_rdma_error_recovery(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) if (unlikely(wc->status != IB_WC_SUCCESS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) nvme_rdma_wr_error(cq, wc, "MEMREG");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) struct nvme_rdma_request *req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) if (unlikely(wc->status != IB_WC_SUCCESS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) nvme_rdma_end_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) struct nvme_rdma_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) struct ib_send_wr wr = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) .opcode = IB_WR_LOCAL_INV,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) .next = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) .num_sge = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) .send_flags = IB_SEND_SIGNALED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) .ex.invalidate_rkey = req->mr->rkey,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) req->reg_cqe.done = nvme_rdma_inv_rkey_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) wr.wr_cqe = &req->reg_cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) return ib_post_send(queue->qp, &wr, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) struct nvme_rdma_device *dev = queue->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) struct ib_device *ibdev = dev->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) struct list_head *pool = &queue->qp->rdma_mrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) if (!blk_rq_nr_phys_segments(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (blk_integrity_rq(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) req->metadata_sgl->nents, rq_dma_dir(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) sg_free_table_chained(&req->metadata_sgl->sg_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) NVME_INLINE_METADATA_SG_CNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) if (req->use_sig_mr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) pool = &queue->qp->sig_mrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) if (req->mr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) ib_mr_pool_put(queue->qp, pool, req->mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) req->mr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) rq_dma_dir(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) static int nvme_rdma_set_sg_null(struct nvme_command *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) sg->addr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) put_unaligned_le24(0, sg->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) put_unaligned_le32(0, sg->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) struct nvme_rdma_request *req, struct nvme_command *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) struct ib_sge *sge = &req->sge[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) struct scatterlist *sgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) u32 len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) for_each_sg(req->data_sgl.sg_table.sgl, sgl, count, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) sge->addr = sg_dma_address(sgl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) sge->length = sg_dma_len(sgl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) sge->lkey = queue->device->pd->local_dma_lkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) len += sge->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) sge++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) sg->length = cpu_to_le32(len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) req->num_sge += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) struct nvme_rdma_request *req, struct nvme_command *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) struct nvme_rdma_request *req, struct nvme_command *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) if (WARN_ON_ONCE(!req->mr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) * Align the MR to a 4K page size to match the ctrl page size and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) * the block virtual boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) SZ_4K);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (unlikely(nr < count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) req->mr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) if (nr < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) return nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) req->reg_cqe.done = nvme_rdma_memreg_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) memset(&req->reg_wr, 0, sizeof(req->reg_wr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) req->reg_wr.wr.opcode = IB_WR_REG_MR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) req->reg_wr.wr.wr_cqe = &req->reg_cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) req->reg_wr.wr.num_sge = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) req->reg_wr.mr = req->mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) req->reg_wr.key = req->mr->rkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) req->reg_wr.access = IB_ACCESS_LOCAL_WRITE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) IB_ACCESS_REMOTE_READ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) IB_ACCESS_REMOTE_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) sg->addr = cpu_to_le64(req->mr->iova);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) put_unaligned_le24(req->mr->length, sg->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) put_unaligned_le32(req->mr->rkey, sg->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) sg->type = (NVME_KEY_SGL_FMT_DATA_DESC << 4) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) NVME_SGL_FMT_INVALIDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) struct nvme_command *cmd, struct ib_sig_domain *domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) u16 control, u8 pi_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) domain->sig_type = IB_SIG_TYPE_T10_DIF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) domain->sig.dif.bg_type = IB_T10DIF_CRC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) domain->sig.dif.pi_interval = 1 << bi->interval_exp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) if (control & NVME_RW_PRINFO_PRCHK_REF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) domain->sig.dif.ref_remap = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) domain->sig.dif.app_escape = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) if (pi_type == NVME_NS_DPS_PI_TYPE3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) domain->sig.dif.ref_escape = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) u8 pi_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) u16 control = le16_to_cpu(cmd->rw.control);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) memset(sig_attrs, 0, sizeof(*sig_attrs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) if (control & NVME_RW_PRINFO_PRACT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) /* for WRITE_INSERT/READ_STRIP no memory domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) pi_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) /* Clear the PRACT bit since HCA will generate/verify the PI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) control &= ~NVME_RW_PRINFO_PRACT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) cmd->rw.control = cpu_to_le16(control);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) pi_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) pi_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) *mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) *mask |= IB_SIG_CHECK_REFTAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) *mask |= IB_SIG_CHECK_GUARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) if (unlikely(wc->status != IB_WC_SUCCESS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) nvme_rdma_wr_error(cq, wc, "SIG");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) struct nvme_rdma_request *req, struct nvme_command *c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) int count, int pi_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) struct nvme_rdma_sgl *sgl = &req->data_sgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) struct ib_reg_wr *wr = &req->reg_wr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) struct request *rq = blk_mq_rq_from_pdu(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) struct nvme_ns *ns = rq->q->queuedata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) struct bio *bio = rq->bio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) if (WARN_ON_ONCE(!req->mr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) req->metadata_sgl->sg_table.sgl, pi_count, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) SZ_4K);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) if (unlikely(nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) goto mr_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) req->mr->sig_attrs, ns->pi_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) req->reg_cqe.done = nvme_rdma_sig_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) memset(wr, 0, sizeof(*wr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) wr->wr.wr_cqe = &req->reg_cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) wr->wr.num_sge = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) wr->wr.send_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) wr->mr = req->mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) wr->key = req->mr->rkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) wr->access = IB_ACCESS_LOCAL_WRITE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) IB_ACCESS_REMOTE_READ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) IB_ACCESS_REMOTE_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) sg->addr = cpu_to_le64(req->mr->iova);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) put_unaligned_le24(req->mr->length, sg->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) put_unaligned_le32(req->mr->rkey, sg->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) mr_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) req->mr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) if (nr < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) return nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) struct request *rq, struct nvme_command *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) struct nvme_rdma_device *dev = queue->device;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) struct ib_device *ibdev = dev->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) int pi_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) int count, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) req->num_sge = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) refcount_set(&req->ref, 2); /* send and recv completions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) c->common.flags |= NVME_CMD_SGL_METABUF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) if (!blk_rq_nr_phys_segments(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) return nvme_rdma_set_sg_null(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) NVME_INLINE_SG_CNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) req->data_sgl.sg_table.sgl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) req->data_sgl.nents, rq_dma_dir(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) if (unlikely(count <= 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) goto out_free_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) if (blk_integrity_rq(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) req->metadata_sgl->sg_table.sgl =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) (struct scatterlist *)(req->metadata_sgl + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) blk_rq_count_integrity_sg(rq->q, rq->bio),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) req->metadata_sgl->sg_table.sgl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) NVME_INLINE_METADATA_SG_CNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) goto out_unmap_sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) rq->bio, req->metadata_sgl->sg_table.sgl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) pi_count = ib_dma_map_sg(ibdev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) req->metadata_sgl->sg_table.sgl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) req->metadata_sgl->nents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) rq_dma_dir(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) if (unlikely(pi_count <= 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) goto out_free_pi_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) if (req->use_sig_mr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) if (count <= dev->num_inline_segments) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) queue->ctrl->use_inline_data &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) blk_rq_payload_bytes(rq) <=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) nvme_rdma_inline_data_size(queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) ret = nvme_rdma_map_sg_inline(queue, req, c, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) ret = nvme_rdma_map_sg_single(queue, req, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) ret = nvme_rdma_map_sg_fr(queue, req, c, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) goto out_unmap_pi_sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) out_unmap_pi_sg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) if (blk_integrity_rq(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) req->metadata_sgl->nents, rq_dma_dir(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) out_free_pi_table:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) if (blk_integrity_rq(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) sg_free_table_chained(&req->metadata_sgl->sg_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) NVME_INLINE_METADATA_SG_CNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) out_unmap_sg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) rq_dma_dir(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) out_free_table:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) struct nvme_rdma_qe *qe =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) struct nvme_rdma_request *req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) container_of(qe, struct nvme_rdma_request, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) if (unlikely(wc->status != IB_WC_SUCCESS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) nvme_rdma_wr_error(cq, wc, "SEND");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) nvme_rdma_end_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) struct ib_send_wr *first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) struct ib_send_wr wr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) sge->addr = qe->dma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) sge->length = sizeof(struct nvme_command);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) sge->lkey = queue->device->pd->local_dma_lkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) wr.next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) wr.wr_cqe = &qe->cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) wr.sg_list = sge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) wr.num_sge = num_sge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) wr.opcode = IB_WR_SEND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) wr.send_flags = IB_SEND_SIGNALED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) if (first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) first->next = ≀
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) first = ≀
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) ret = ib_post_send(queue->qp, first, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) "%s failed with error code %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) struct nvme_rdma_qe *qe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) struct ib_recv_wr wr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) struct ib_sge list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) list.addr = qe->dma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) list.length = sizeof(struct nvme_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) list.lkey = queue->device->pd->local_dma_lkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) qe->cqe.done = nvme_rdma_recv_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) wr.next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) wr.wr_cqe = &qe->cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) wr.sg_list = &list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) wr.num_sge = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) ret = ib_post_recv(queue->qp, &wr, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) "%s failed with error code %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) u32 queue_idx = nvme_rdma_queue_idx(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) if (queue_idx == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) return queue->ctrl->admin_tag_set.tags[queue_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) return queue->ctrl->tag_set.tags[queue_idx - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) if (unlikely(wc->status != IB_WC_SUCCESS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) nvme_rdma_wr_error(cq, wc, "ASYNC");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) struct nvme_rdma_queue *queue = &ctrl->queues[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) struct ib_device *dev = queue->device->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) struct nvme_rdma_qe *sqe = &ctrl->async_event_sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) struct nvme_command *cmd = sqe->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) struct ib_sge sge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) memset(cmd, 0, sizeof(*cmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) cmd->common.opcode = nvme_admin_async_event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) cmd->common.flags |= NVME_CMD_SGL_METABUF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) nvme_rdma_set_sg_null(cmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) sqe->cqe.done = nvme_rdma_async_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) WARN_ON_ONCE(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) struct nvme_completion *cqe, struct ib_wc *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) struct request *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) struct nvme_rdma_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) if (!rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) "got bad command_id %#x on QP %#x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) cqe->command_id, queue->qp->qp_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) nvme_rdma_error_recovery(queue->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) req->status = cqe->status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) req->result = cqe->result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) if (unlikely(!req->mr ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) wc->ex.invalidate_rkey != req->mr->rkey)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) "Bogus remote invalidation for rkey %#x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) req->mr ? req->mr->rkey : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) nvme_rdma_error_recovery(queue->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) } else if (req->mr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) ret = nvme_rdma_inv_rkey(queue, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) if (unlikely(ret < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) "Queueing INV WR for rkey %#x failed (%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) req->mr->rkey, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) nvme_rdma_error_recovery(queue->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) /* the local invalidation completion will end the request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) nvme_rdma_end_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) struct nvme_rdma_qe *qe =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) struct nvme_rdma_queue *queue = wc->qp->qp_context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) struct ib_device *ibdev = queue->device->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) struct nvme_completion *cqe = qe->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) const size_t len = sizeof(struct nvme_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) if (unlikely(wc->status != IB_WC_SUCCESS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) nvme_rdma_wr_error(cq, wc, "RECV");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) /* sanity checking for received data length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) if (unlikely(wc->byte_len < len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) "Unexpected nvme completion length(%d)\n", wc->byte_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) nvme_rdma_error_recovery(queue->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) * AEN requests are special as they don't time out and can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) * survive any kind of queue freeze and often don't respond to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) * aborts. We don't even bother to allocate a struct request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) * for them but rather special case them here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) cqe->command_id)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) &cqe->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) nvme_rdma_process_nvme_rsp(queue, cqe, wc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) nvme_rdma_post_recv(queue, qe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) int ret, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) for (i = 0; i < queue->queue_size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) struct rdma_cm_event *ev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) struct rdma_cm_id *cm_id = queue->cm_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) int status = ev->status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) const char *rej_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) const struct nvme_rdma_cm_rej *rej_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) u8 rej_data_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) rej_msg = rdma_reject_msg(cm_id, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) if (rej_data && rej_data_len >= sizeof(u16)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) u16 sts = le16_to_cpu(rej_data->sts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) "Connect rejected: status %d (%s) nvme status %d (%s).\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) status, rej_msg, sts, nvme_rdma_cm_msg(sts));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) "Connect rejected: status %d (%s).\n", status, rej_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) return -ECONNRESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) struct nvme_ctrl *ctrl = &queue->ctrl->ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) ret = nvme_rdma_create_queue_ib(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) if (ctrl->opts->tos >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) rdma_set_service_type(queue->cm_id, ctrl->opts->tos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) queue->cm_error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) goto out_destroy_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) out_destroy_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) nvme_rdma_destroy_queue_ib(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) struct nvme_rdma_ctrl *ctrl = queue->ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) struct rdma_conn_param param = { };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) struct nvme_rdma_cm_req priv = { };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) param.qp_num = queue->qp->qp_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) param.flow_control = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) param.responder_resources = queue->device->dev->attrs.max_qp_rd_atom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) /* maximum retry count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) param.retry_count = 7;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) param.rnr_retry_count = 7;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) param.private_data = &priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) param.private_data_len = sizeof(priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) * set the admin queue depth to the minimum size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) * specified by the Fabrics standard.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) if (priv.qid == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) * current interpretation of the fabrics spec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) * is at minimum you make hrqsize sqsize+1, or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) * 1's based representation of sqsize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) priv.hrqsize = cpu_to_le16(queue->queue_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) ret = rdma_connect_locked(queue->cm_id, ¶m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) dev_err(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) "rdma_connect_locked failed (%d).\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) struct rdma_cm_event *ev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) struct nvme_rdma_queue *queue = cm_id->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) int cm_error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) dev_dbg(queue->ctrl->ctrl.device, "%s (%d): status %d id %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) rdma_event_msg(ev->event), ev->event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) ev->status, cm_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) switch (ev->event) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) case RDMA_CM_EVENT_ADDR_RESOLVED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) cm_error = nvme_rdma_addr_resolved(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) case RDMA_CM_EVENT_ROUTE_RESOLVED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) cm_error = nvme_rdma_route_resolved(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) case RDMA_CM_EVENT_ESTABLISHED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) queue->cm_error = nvme_rdma_conn_established(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) /* complete cm_done regardless of success/failure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) complete(&queue->cm_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) case RDMA_CM_EVENT_REJECTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) cm_error = nvme_rdma_conn_rejected(queue, ev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) case RDMA_CM_EVENT_ROUTE_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) case RDMA_CM_EVENT_CONNECT_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) case RDMA_CM_EVENT_UNREACHABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) case RDMA_CM_EVENT_ADDR_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) dev_dbg(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) "CM error event %d\n", ev->event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) cm_error = -ECONNRESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) case RDMA_CM_EVENT_DISCONNECTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) case RDMA_CM_EVENT_ADDR_CHANGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) case RDMA_CM_EVENT_TIMEWAIT_EXIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) dev_dbg(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) "disconnect received - connection closed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) nvme_rdma_error_recovery(queue->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) case RDMA_CM_EVENT_DEVICE_REMOVAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) /* device removal is handled via the ib_client API */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) "Unexpected RDMA CM event (%d)\n", ev->event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) nvme_rdma_error_recovery(queue->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) if (cm_error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) queue->cm_error = cm_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) complete(&queue->cm_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) static void nvme_rdma_complete_timed_out(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) struct nvme_rdma_queue *queue = req->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) nvme_rdma_stop_queue(queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) blk_mq_complete_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) static enum blk_eh_timer_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) nvme_rdma_timeout(struct request *rq, bool reserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) struct nvme_rdma_queue *queue = req->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) struct nvme_rdma_ctrl *ctrl = queue->ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) rq->tag, nvme_rdma_queue_idx(queue));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) * If we are resetting, connecting or deleting we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) * complete immediately because we may block controller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) * teardown or setup sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) * - ctrl disable/shutdown fabrics requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) * - connect requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) * - initialization admin requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) * - I/O requests that entered after unquiescing and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) * the controller stopped responding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) * All other requests should be cancelled by the error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) * recovery work, so it's fine that we fail it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) nvme_rdma_complete_timed_out(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) return BLK_EH_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) * LIVE state should trigger the normal error recovery which will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) * handle completing this request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) nvme_rdma_error_recovery(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) return BLK_EH_RESET_TIMER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) const struct blk_mq_queue_data *bd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) struct nvme_ns *ns = hctx->queue->queuedata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) struct nvme_rdma_queue *queue = hctx->driver_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) struct request *rq = bd->rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) struct nvme_rdma_qe *sqe = &req->sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) struct nvme_command *c = sqe->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) struct ib_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) blk_status_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) WARN_ON_ONCE(rq->tag < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) dev = queue->device->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) req->sqe.dma = ib_dma_map_single(dev, req->sqe.data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) sizeof(struct nvme_command),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) err = ib_dma_mapping_error(dev, req->sqe.dma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) if (unlikely(err))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) return BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) ib_dma_sync_single_for_cpu(dev, sqe->dma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) sizeof(struct nvme_command), DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) ret = nvme_setup_cmd(ns, rq, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) goto unmap_qe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) blk_mq_start_request(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) queue->pi_support &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) (c->common.opcode == nvme_cmd_write ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) c->common.opcode == nvme_cmd_read) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) nvme_ns_has_pi(ns))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) req->use_sig_mr = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) req->use_sig_mr = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) err = nvme_rdma_map_data(queue, rq, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) if (unlikely(err < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) dev_err(queue->ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) "Failed to map data (%d)\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) sqe->cqe.done = nvme_rdma_send_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) ib_dma_sync_single_for_device(dev, sqe->dma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) sizeof(struct nvme_command), DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) req->mr ? &req->reg_wr.wr : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) if (unlikely(err))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) goto err_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) return BLK_STS_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) err_unmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) nvme_rdma_unmap_data(queue, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) if (err == -ENOMEM || err == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) ret = BLK_STS_RESOURCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) ret = BLK_STS_IOERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) nvme_cleanup_cmd(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) unmap_qe:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) struct nvme_rdma_queue *queue = hctx->driver_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) return ib_process_cq_direct(queue->ib_cq, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) struct request *rq = blk_mq_rq_from_pdu(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) struct ib_mr_status mr_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) pr_err("ib_check_mr_status failed, ret %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) nvme_req(rq)->status = NVME_SC_INVALID_PI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) switch (mr_status.sig_err.err_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) case IB_SIG_BAD_GUARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) case IB_SIG_BAD_REFTAG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) case IB_SIG_BAD_APPTAG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) mr_status.sig_err.err_type, mr_status.sig_err.expected,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) mr_status.sig_err.actual);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) static void nvme_rdma_complete_rq(struct request *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) struct nvme_rdma_queue *queue = req->queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) struct ib_device *ibdev = queue->device->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) if (req->use_sig_mr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) nvme_rdma_check_pi_status(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) nvme_rdma_unmap_data(queue, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) nvme_complete_rq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) struct nvme_rdma_ctrl *ctrl = set->driver_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) /* separate read/write queues */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) set->map[HCTX_TYPE_DEFAULT].nr_queues =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) ctrl->io_queues[HCTX_TYPE_DEFAULT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) set->map[HCTX_TYPE_READ].nr_queues =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) ctrl->io_queues[HCTX_TYPE_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) set->map[HCTX_TYPE_READ].queue_offset =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) ctrl->io_queues[HCTX_TYPE_DEFAULT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) /* shared read/write queues */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) set->map[HCTX_TYPE_DEFAULT].nr_queues =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) ctrl->io_queues[HCTX_TYPE_DEFAULT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) set->map[HCTX_TYPE_READ].nr_queues =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) ctrl->io_queues[HCTX_TYPE_DEFAULT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) set->map[HCTX_TYPE_READ].queue_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) ctrl->device->dev, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) ctrl->device->dev, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) /* map dedicated poll queues only if we have queues left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) set->map[HCTX_TYPE_POLL].nr_queues =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) ctrl->io_queues[HCTX_TYPE_POLL];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) set->map[HCTX_TYPE_POLL].queue_offset =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) ctrl->io_queues[HCTX_TYPE_DEFAULT] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) ctrl->io_queues[HCTX_TYPE_READ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) dev_info(ctrl->ctrl.device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) "mapped %d/%d/%d default/read/poll queues.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) ctrl->io_queues[HCTX_TYPE_DEFAULT],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) ctrl->io_queues[HCTX_TYPE_READ],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) ctrl->io_queues[HCTX_TYPE_POLL]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) static const struct blk_mq_ops nvme_rdma_mq_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) .queue_rq = nvme_rdma_queue_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) .complete = nvme_rdma_complete_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) .init_request = nvme_rdma_init_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) .exit_request = nvme_rdma_exit_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) .init_hctx = nvme_rdma_init_hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) .timeout = nvme_rdma_timeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) .map_queues = nvme_rdma_map_queues,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) .poll = nvme_rdma_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) .queue_rq = nvme_rdma_queue_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) .complete = nvme_rdma_complete_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) .init_request = nvme_rdma_init_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) .exit_request = nvme_rdma_exit_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) .init_hctx = nvme_rdma_init_admin_hctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) .timeout = nvme_rdma_timeout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) cancel_work_sync(&ctrl->err_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) cancel_delayed_work_sync(&ctrl->reconnect_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) nvme_rdma_teardown_io_queues(ctrl, shutdown);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) if (shutdown)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) nvme_shutdown_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) nvme_disable_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) nvme_rdma_teardown_admin_queue(ctrl, shutdown);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) struct nvme_rdma_ctrl *ctrl =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) nvme_stop_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) nvme_rdma_shutdown_ctrl(ctrl, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) /* state change failure should never happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) if (nvme_rdma_setup_ctrl(ctrl, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) goto out_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) out_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) ++ctrl->ctrl.nr_reconnects;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) nvme_rdma_reconnect_or_remove(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) .name = "rdma",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) .module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) .reg_read32 = nvmf_reg_read32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) .reg_read64 = nvmf_reg_read64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) .reg_write32 = nvmf_reg_write32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) .free_ctrl = nvme_rdma_free_ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) .submit_async_event = nvme_rdma_submit_async_event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) .delete_ctrl = nvme_rdma_delete_ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) .get_address = nvmf_get_address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * Fails a connection request if it matches an existing controller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) * (association) with the same tuple:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) * if local address is not specified in the request, it will match an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) * existing controller with all the other parameters the same and no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) * local port address specified as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) * The ports don't need to be compared as they are intrinsically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) * already matched by the port pointers supplied.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) struct nvme_rdma_ctrl *ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) mutex_lock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) found = nvmf_ip_options_match(&ctrl->ctrl, opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) if (found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) mutex_unlock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) return found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) struct nvmf_ctrl_options *opts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) struct nvme_rdma_ctrl *ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) bool changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) if (!ctrl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) ctrl->ctrl.opts = opts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) INIT_LIST_HEAD(&ctrl->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) if (!(opts->mask & NVMF_OPT_TRSVCID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) opts->trsvcid =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) if (!opts->trsvcid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) goto out_free_ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) opts->mask |= NVMF_OPT_TRSVCID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) opts->traddr, opts->trsvcid, &ctrl->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) pr_err("malformed address passed: %s:%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) opts->traddr, opts->trsvcid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) goto out_free_ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) if (opts->mask & NVMF_OPT_HOST_TRADDR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) opts->host_traddr, NULL, &ctrl->src_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) pr_err("malformed src address passed: %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) opts->host_traddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) goto out_free_ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) ret = -EALREADY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) goto out_free_ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) INIT_DELAYED_WORK(&ctrl->reconnect_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) nvme_rdma_reconnect_ctrl_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) opts->nr_poll_queues + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) ctrl->ctrl.sqsize = opts->queue_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) ctrl->ctrl.kato = opts->kato;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) if (!ctrl->queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) goto out_free_ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 0 /* no quirks, we're perfect! */);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) goto out_kfree_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) WARN_ON_ONCE(!changed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) ret = nvme_rdma_setup_ctrl(ctrl, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) goto out_uninit_ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) mutex_lock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) mutex_unlock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) return &ctrl->ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) out_uninit_ctrl:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) nvme_uninit_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) nvme_put_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) out_kfree_queues:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) kfree(ctrl->queues);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) out_free_ctrl:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) kfree(ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) static struct nvmf_transport_ops nvme_rdma_transport = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) .name = "rdma",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) .module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) .required_opts = NVMF_OPT_TRADDR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) NVMF_OPT_TOS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) .create_ctrl = nvme_rdma_create_ctrl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) struct nvme_rdma_ctrl *ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) struct nvme_rdma_device *ndev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) mutex_lock(&device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) list_for_each_entry(ndev, &device_list, entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) if (ndev->dev == ib_device) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) mutex_unlock(&device_list_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) if (!found)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) /* Delete all controllers using this device */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) mutex_lock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) if (ctrl->device->dev != ib_device)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) nvme_delete_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) mutex_unlock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) flush_workqueue(nvme_delete_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) static struct ib_client nvme_rdma_ib_client = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) .name = "nvme_rdma",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) .remove = nvme_rdma_remove_one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) static int __init nvme_rdma_init_module(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) ret = ib_register_client(&nvme_rdma_ib_client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) ret = nvmf_register_transport(&nvme_rdma_transport);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) goto err_unreg_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) err_unreg_client:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) ib_unregister_client(&nvme_rdma_ib_client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) static void __exit nvme_rdma_cleanup_module(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) struct nvme_rdma_ctrl *ctrl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) nvmf_unregister_transport(&nvme_rdma_transport);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) ib_unregister_client(&nvme_rdma_ib_client);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) mutex_lock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) nvme_delete_ctrl(&ctrl->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) mutex_unlock(&nvme_rdma_ctrl_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) flush_workqueue(nvme_delete_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) module_init(nvme_rdma_init_module);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) module_exit(nvme_rdma_cleanup_module);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) MODULE_LICENSE("GPL v2");