^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (c) 2014-2017 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This software is available to you under a choice of one of two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * licenses. You may choose to be licensed under the terms of the GNU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * General Public License (GPL) Version 2, available from the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * COPYING in the main directory of this source tree, or the BSD-type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * license below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Redistribution and use in source and binary forms, with or without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * modification, are permitted provided that the following conditions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * are met:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Redistributions of source code must retain the above copyright
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * notice, this list of conditions and the following disclaimer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Redistributions in binary form must reproduce the above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * copyright notice, this list of conditions and the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * disclaimer in the documentation and/or other materials provided
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * with the distribution.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Neither the name of the Network Appliance, Inc. nor the names of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * its contributors may be used to endorse or promote products
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * derived from this software without specific prior written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * permission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * rpc_rdma.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * This file contains the guts of the RPC RDMA protocol, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * does marshaling/unmarshaling, etc. It is also where interfacing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * to the Linux RPC framework lives.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #include <linux/sunrpc/svc_rdma.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include "xprt_rdma.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include <trace/events/rpcrdma.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) # define RPCDBG_FACILITY RPCDBG_TRANS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /* Returns size of largest RPC-over-RDMA header in a Call message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * The largest Call header contains a full-size Read list and a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * minimal Reply chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* Fixed header fields and list discriminators */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) size = RPCRDMA_HDRLEN_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* Maximum Read list size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* Minimal Read chunk size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) size += sizeof(__be32); /* segment count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) size += rpcrdma_segment_maxsz * sizeof(__be32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) size += sizeof(__be32); /* list discriminator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /* Returns size of largest RPC-over-RDMA header in a Reply message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * There is only one Write list or one Reply chunk per Reply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * message. The larger list is the Write list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) unsigned int size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /* Fixed header fields and list discriminators */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) size = RPCRDMA_HDRLEN_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /* Maximum Write list size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) size += sizeof(__be32); /* segment count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) size += sizeof(__be32); /* list discriminator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * @ep: endpoint to initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * The max_inline fields contain the maximum size of an RPC message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * so the marshaling code doesn't have to repeat this calculation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * for every RPC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) unsigned int maxsegs = ep->re_max_rdma_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) ep->re_max_inline_send =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) ep->re_max_inline_recv =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* The client can send a request inline as long as the RPCRDMA header
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * plus the RPC call fit under the transport's inline limit. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * combined call message size exceeds that limit, the client must use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * a Read chunk for this operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * A Read chunk is also required if sending the RPC call inline would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * exceed this device's max_sge limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) struct rpc_rqst *rqst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) struct xdr_buf *xdr = &rqst->rq_snd_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) struct rpcrdma_ep *ep = r_xprt->rx_ep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) unsigned int count, remaining, offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if (xdr->len > ep->re_max_inline_send)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (xdr->page_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) remaining = xdr->page_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) offset = offset_in_page(xdr->page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) count = RPCRDMA_MIN_SEND_SGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) while (remaining) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) remaining -= min_t(unsigned int,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) PAGE_SIZE - offset, remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (++count > ep->re_attr.cap.max_send_sge)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /* The client can't know how large the actual reply will be. Thus it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * plans for the largest possible reply for that particular ULP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * operation. If the maximum combined reply message size exceeds that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * limit, the client must provide a write list or a reply chunk for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * this request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) struct rpc_rqst *rqst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* The client is required to provide a Reply chunk if the maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * size of the non-payload part of the RPC Reply is larger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * the inline threshold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) const struct rpc_rqst *rqst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) const struct xdr_buf *buf = &rqst->rq_rcv_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) return (buf->head[0].iov_len + buf->tail[0].iov_len) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) r_xprt->rx_ep->re_max_inline_recv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /* ACL likes to be lazy in allocating pages. For TCP, these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * pages can be allocated during receive processing. Not true
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * for RDMA, which must always provision receive buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * up front.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) static noinline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct page **ppages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) len = buf->page_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) while (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (!*ppages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) if (!*ppages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) ppages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) len -= PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /* Split @vec on page boundaries into SGEs. FMR registers pages, not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * a byte range. Other modes coalesce these SGEs into a single MR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * when they can.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * Returns pointer to next available SGE, and bumps the total number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * of SGEs consumed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static struct rpcrdma_mr_seg *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) unsigned int *n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) u32 remaining, page_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) char *base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) base = vec->iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) page_offset = offset_in_page(base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) remaining = vec->iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) while (remaining) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) seg->mr_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) seg->mr_offset = base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) remaining -= seg->mr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) base += seg->mr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) ++seg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) ++(*n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) page_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) return seg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /* Convert @xdrbuf into SGEs no larger than a page each. As they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * are registered, these SGEs are then coalesced into RDMA segments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * when the selected memreg mode supports it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * Returns positive number of SGEs consumed, or a negative errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) unsigned int pos, enum rpcrdma_chunktype type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) struct rpcrdma_mr_seg *seg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) unsigned long page_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) unsigned int len, n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) struct page **ppages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) n = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (pos == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) len = xdrbuf->page_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) page_base = offset_in_page(xdrbuf->page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) while (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) seg->mr_page = *ppages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) seg->mr_offset = (char *)page_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) len -= seg->mr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) ++ppages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) ++seg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) ++n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) page_base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /* When encoding a Read chunk, the tail iovec contains an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * XDR pad and may be omitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) /* When encoding a Write chunk, some servers need to see an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * extra segment for non-XDR-aligned Write chunks. The upper
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * layer provides space in the tail iovec that may be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * for this purpose.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (xdrbuf->tail[0].iov_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (unlikely(n > RPCRDMA_MAX_SEGS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) return n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) p = xdr_reserve_space(xdr, 4 * sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) u32 position)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) p = xdr_reserve_space(xdr, 6 * sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) *p++ = xdr_one; /* Item present */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) mr->mr_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) struct rpcrdma_mr_seg *seg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) int nsegs, bool writing,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) struct rpcrdma_mr **mr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) *mr = rpcrdma_mr_pop(&req->rl_free_mrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) if (!*mr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) *mr = rpcrdma_mr_get(r_xprt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (!*mr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) goto out_getmr_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) trace_xprtrdma_mr_get(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) (*mr)->mr_req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) rpcrdma_mr_push(*mr, &req->rl_registered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) out_getmr_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) trace_xprtrdma_nomrs(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) rpcrdma_mrs_refresh(r_xprt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) return ERR_PTR(-EAGAIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* Register and XDR encode the Read list. Supports encoding a list of read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * segments that belong to a single read chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) * Read chunklist (a linked list):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) * N elements, position P (same P for all chunks of same arg!):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * Returns zero on success, or a negative errno if a failure occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * @xdr is advanced to the next position in the stream.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * Only a single @pos value is currently supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) static int rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) struct rpc_rqst *rqst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) enum rpcrdma_chunktype rtype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) struct xdr_stream *xdr = &req->rl_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) struct rpcrdma_mr_seg *seg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) struct rpcrdma_mr *mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) unsigned int pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) int nsegs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) if (rtype == rpcrdma_noch_pullup || rtype == rpcrdma_noch_mapped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) pos = rqst->rq_snd_buf.head[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) if (rtype == rpcrdma_areadch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) pos = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) seg = req->rl_segments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) rtype, seg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) if (nsegs < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) return nsegs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (IS_ERR(seg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return PTR_ERR(seg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (encode_read_segment(xdr, mr, pos) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) r_xprt->rx_stats.read_chunk_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) nsegs -= mr->mr_nents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) } while (nsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) if (xdr_stream_encode_item_absent(xdr) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) /* Register and XDR encode the Write list. Supports encoding a list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * containing one array of plain segments that belong to a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * write chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * Write chunklist (a list of (one) counted array):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * N elements:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * 1 - N - HLOO - HLOO - ... - HLOO - 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * Returns zero on success, or a negative errno if a failure occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * @xdr is advanced to the next position in the stream.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * Only a single Write chunk is currently supported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) static int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) struct rpc_rqst *rqst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) enum rpcrdma_chunktype wtype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) struct xdr_stream *xdr = &req->rl_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct rpcrdma_mr_seg *seg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) struct rpcrdma_mr *mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) int nsegs, nchunks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) __be32 *segcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) if (wtype != rpcrdma_writech)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) seg = req->rl_segments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) rqst->rq_rcv_buf.head[0].iov_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) wtype, seg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) if (nsegs < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) return nsegs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (xdr_stream_encode_item_present(xdr) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) segcount = xdr_reserve_space(xdr, sizeof(*segcount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) if (unlikely(!segcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) /* Actual value encoded below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) nchunks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (IS_ERR(seg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) return PTR_ERR(seg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (encode_rdma_segment(xdr, mr) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) r_xprt->rx_stats.write_chunk_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) r_xprt->rx_stats.total_rdma_request += mr->mr_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) nchunks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) nsegs -= mr->mr_nents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) } while (nsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) /* Update count of segments in this Write chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) *segcount = cpu_to_be32(nchunks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) if (xdr_stream_encode_item_absent(xdr) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /* Register and XDR encode the Reply chunk. Supports encoding an array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * of plain segments that belong to a single write (reply) chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * Reply chunk (a counted array):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) * N elements:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) * 1 - N - HLOO - HLOO - ... - HLOO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) * Returns zero on success, or a negative errno if a failure occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * @xdr is advanced to the next position in the stream.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) static int rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) struct rpc_rqst *rqst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) enum rpcrdma_chunktype wtype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) struct xdr_stream *xdr = &req->rl_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) struct rpcrdma_mr_seg *seg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) struct rpcrdma_mr *mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) int nsegs, nchunks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) __be32 *segcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) if (wtype != rpcrdma_replych) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) if (xdr_stream_encode_item_absent(xdr) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) seg = req->rl_segments;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) if (nsegs < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) return nsegs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) if (xdr_stream_encode_item_present(xdr) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) segcount = xdr_reserve_space(xdr, sizeof(*segcount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (unlikely(!segcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) /* Actual value encoded below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) nchunks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) if (IS_ERR(seg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) return PTR_ERR(seg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (encode_rdma_segment(xdr, mr) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) return -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) r_xprt->rx_stats.reply_chunk_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) r_xprt->rx_stats.total_rdma_request += mr->mr_length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) nchunks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) nsegs -= mr->mr_nents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) } while (nsegs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) /* Update count of segments in the Reply chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) *segcount = cpu_to_be32(nchunks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) static void rpcrdma_sendctx_done(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) struct rpcrdma_req *req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) container_of(kref, struct rpcrdma_req, rl_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) struct rpcrdma_rep *rep = req->rl_reply;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) rpcrdma_complete_rqst(rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) rep->rr_rxprt->rx_stats.reply_waits_for_send++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * @sc: sendctx containing SGEs to unmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) struct rpcrdma_regbuf *rb = sc->sc_req->rl_sendbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) struct ib_sge *sge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (!sc->sc_unmap_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) /* The first two SGEs contain the transport header and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * the inline buffer. These are always left mapped so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * they can be cheaply re-used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) ++sge, --sc->sc_unmap_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) ib_dma_unmap_page(rdmab_device(rb), sge->addr, sge->length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) /* Prepare an SGE for the RPC-over-RDMA transport header.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) struct rpcrdma_req *req, u32 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) struct rpcrdma_sendctx *sc = req->rl_sendctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) sge->addr = rdmab_addr(rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) sge->length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) sge->lkey = rdmab_lkey(rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) /* The head iovec is straightforward, as it is usually already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * DMA-mapped. Sync the content that has changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) static bool rpcrdma_prepare_head_iov(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) struct rpcrdma_req *req, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) struct rpcrdma_sendctx *sc = req->rl_sendctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct rpcrdma_regbuf *rb = req->rl_sendbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) sge->addr = rdmab_addr(rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) sge->length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) sge->lkey = rdmab_lkey(rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) /* If there is a page list present, DMA map and prepare an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * SGE for each page to be sent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) static bool rpcrdma_prepare_pagelist(struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) struct xdr_buf *xdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) struct rpcrdma_sendctx *sc = req->rl_sendctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) struct rpcrdma_regbuf *rb = req->rl_sendbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) unsigned int page_base, len, remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) struct page **ppages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) struct ib_sge *sge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) page_base = offset_in_page(xdr->page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) remaining = xdr->page_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) while (remaining) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) sge = &sc->sc_sges[req->rl_wr.num_sge++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) sge->addr = ib_dma_map_page(rdmab_device(rb), *ppages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) page_base, len, DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) goto out_mapping_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) sge->length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) sge->lkey = rdmab_lkey(rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) sc->sc_unmap_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) ppages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) remaining -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) page_base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) out_mapping_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) trace_xprtrdma_dma_maperr(sge->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) /* The tail iovec may include an XDR pad for the page list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * as well as additional content, and may not reside in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) * same page as the head iovec.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) struct xdr_buf *xdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) unsigned int page_base, unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) struct rpcrdma_sendctx *sc = req->rl_sendctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) struct rpcrdma_regbuf *rb = req->rl_sendbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) struct page *page = virt_to_page(xdr->tail[0].iov_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) sge->addr = ib_dma_map_page(rdmab_device(rb), page, page_base, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) DMA_TO_DEVICE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) if (ib_dma_mapping_error(rdmab_device(rb), sge->addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) goto out_mapping_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) sge->length = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) sge->lkey = rdmab_lkey(rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) ++sc->sc_unmap_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) out_mapping_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) trace_xprtrdma_dma_maperr(sge->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) /* Copy the tail to the end of the head buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) static void rpcrdma_pullup_tail_iov(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) struct xdr_buf *xdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) unsigned char *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) dst = (unsigned char *)xdr->head[0].iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) dst += xdr->head[0].iov_len + xdr->page_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) memmove(dst, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) r_xprt->rx_stats.pullup_copy_count += xdr->tail[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) /* Copy pagelist content into the head buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) static void rpcrdma_pullup_pagelist(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) struct xdr_buf *xdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) unsigned int len, page_base, remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) struct page **ppages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) unsigned char *src, *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) dst = (unsigned char *)xdr->head[0].iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) dst += xdr->head[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) page_base = offset_in_page(xdr->page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) remaining = xdr->page_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) while (remaining) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) src = page_address(*ppages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) src += page_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) len = min_t(unsigned int, PAGE_SIZE - page_base, remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) memcpy(dst, src, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) r_xprt->rx_stats.pullup_copy_count += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) ppages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) dst += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) remaining -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) page_base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) /* Copy the contents of @xdr into @rl_sendbuf and DMA sync it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * When the head, pagelist, and tail are small, a pull-up copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * is considerably less costly than DMA mapping the components
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * of @xdr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * Assumptions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) * - the caller has already verified that the total length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) * of the RPC Call body will fit into @rl_sendbuf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) static bool rpcrdma_prepare_noch_pullup(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) struct xdr_buf *xdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) if (unlikely(xdr->tail[0].iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) rpcrdma_pullup_tail_iov(r_xprt, req, xdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) if (unlikely(xdr->page_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) rpcrdma_pullup_pagelist(r_xprt, req, xdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) /* The whole RPC message resides in the head iovec now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) return rpcrdma_prepare_head_iov(r_xprt, req, xdr->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) static bool rpcrdma_prepare_noch_mapped(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) struct xdr_buf *xdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) struct kvec *tail = &xdr->tail[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) if (xdr->page_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) if (!rpcrdma_prepare_pagelist(req, xdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) if (tail->iov_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) if (!rpcrdma_prepare_tail_iov(req, xdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) offset_in_page(tail->iov_base),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) tail->iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) if (req->rl_sendctx->sc_unmap_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) kref_get(&req->rl_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) struct rpcrdma_req *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) struct xdr_buf *xdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) /* If there is a Read chunk, the page list is being handled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * via explicit RDMA, and thus is skipped here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) /* Do not include the tail if it is only an XDR pad */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) if (xdr->tail[0].iov_len > 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) unsigned int page_base, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) /* If the content in the page list is an odd length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * xdr_write_pages() adds a pad at the beginning of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * the tail iovec. Force the tail's non-pad content to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * land at the next XDR position in the Send message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) page_base = offset_in_page(xdr->tail[0].iov_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) len = xdr->tail[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) page_base += len & 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) len -= len & 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) kref_get(&req->rl_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) * @r_xprt: controlling transport
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) * @req: context of RPC Call being marshalled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) * @hdrlen: size of transport header, in bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) * @xdr: xdr_buf containing RPC Call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * @rtype: chunk type being encoded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) * Returns 0 on success; otherwise a negative errno is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) struct rpcrdma_req *req, u32 hdrlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) struct xdr_buf *xdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) enum rpcrdma_chunktype rtype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) if (!req->rl_sendctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) goto out_nosc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) req->rl_sendctx->sc_unmap_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) req->rl_sendctx->sc_req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) kref_init(&req->rl_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) req->rl_wr.wr_cqe = &req->rl_sendctx->sc_cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) req->rl_wr.sg_list = req->rl_sendctx->sc_sges;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) req->rl_wr.num_sge = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) req->rl_wr.opcode = IB_WR_SEND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) switch (rtype) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) case rpcrdma_noch_pullup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) goto out_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) case rpcrdma_noch_mapped:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) if (!rpcrdma_prepare_noch_mapped(r_xprt, req, xdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) goto out_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) case rpcrdma_readch:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) if (!rpcrdma_prepare_readch(r_xprt, req, xdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) goto out_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) case rpcrdma_areadch:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) goto out_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) out_unmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) rpcrdma_sendctx_unmap(req->rl_sendctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) out_nosc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) * rpcrdma_marshal_req - Marshal and send one RPC request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) * @r_xprt: controlling transport
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) * @rqst: RPC request to be marshaled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) * For the RPC in "rqst", this function:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) * - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * - Registers Read, Write, and Reply chunks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * - Constructs the transport header
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) * - Posts a Send WR to send the transport header and request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) * %0 if the RPC was sent successfully,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) * %-ENOTCONN if the connection was lost,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) * %-EAGAIN if the caller should call again with the same arguments,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) * %-ENOBUFS if the caller should call again after a delay,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) * %-EMSGSIZE if the transport header is too small,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * %-EIO if a permanent problem occurred while marshaling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) struct xdr_stream *xdr = &req->rl_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) enum rpcrdma_chunktype rtype, wtype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) struct xdr_buf *buf = &rqst->rq_snd_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) bool ddp_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) if (unlikely(rqst->rq_rcv_buf.flags & XDRBUF_SPARSE_PAGES)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) ret = rpcrdma_alloc_sparse_pages(&rqst->rq_rcv_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) rqst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) /* Fixed header fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) ret = -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) p = xdr_reserve_space(xdr, 4 * sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) *p++ = rqst->rq_xid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) *p++ = rpcrdma_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) *p++ = r_xprt->rx_buf.rb_max_requests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) /* When the ULP employs a GSS flavor that guarantees integrity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) * or privacy, direct data placement of individual data items
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) * is not allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) &rqst->rq_cred->cr_auth->au_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * Chunks needed for results?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * o If the expected result is under the inline threshold, all ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * return as inline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * o Large read ops return data as write chunk(s), header as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) * inline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * o Large non-read ops return as a single reply chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) if (rpcrdma_results_inline(r_xprt, rqst))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) wtype = rpcrdma_noch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) rpcrdma_nonpayload_inline(r_xprt, rqst))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) wtype = rpcrdma_writech;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) wtype = rpcrdma_replych;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) * Chunks needed for arguments?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) * o If the total request is under the inline threshold, all ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) * are sent as inline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) * o Large write ops transmit data as read chunk(s), header as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) * inline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) * o Large non-write ops are sent with the entire message as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) * single read chunk (protocol 0-position special case).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) * This assumes that the upper layer does not present a request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) * that both has a data payload, and whose non-data arguments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) * by themselves are larger than the inline threshold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) if (rpcrdma_args_inline(r_xprt, rqst)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) *p++ = rdma_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) rtype = buf->len < rdmab_length(req->rl_sendbuf) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) rpcrdma_noch_pullup : rpcrdma_noch_mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) } else if (ddp_allowed && buf->flags & XDRBUF_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) *p++ = rdma_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) rtype = rpcrdma_readch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) r_xprt->rx_stats.nomsg_call_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) *p++ = rdma_nomsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) rtype = rpcrdma_areadch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) /* This implementation supports the following combinations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * of chunk lists in one RPC-over-RDMA Call message:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * - Read list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * - Write list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) * - Reply chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) * - Read list + Reply chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) * It might not yet support the following combinations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) * - Read list + Write list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) * It does not support the following combinations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) * - Write list + Reply chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * - Read list + Write list + Reply chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) * This implementation supports only a single chunk in each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * Read or Write list. Thus for example the client cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * send a Call message with a Position Zero Read chunk and a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) * regular Read chunk at the same time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) buf, rtype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) trace_xprtrdma_marshal(req, rtype, wtype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) trace_xprtrdma_marshal_failed(rqst, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) r_xprt->rx_stats.failed_marshal_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) frwr_reset(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) static void __rpcrdma_update_cwnd_locked(struct rpc_xprt *xprt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) struct rpcrdma_buffer *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) u32 grant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) buf->rb_credits = grant;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) xprt->cwnd = grant << RPC_CWNDSHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) static void rpcrdma_update_cwnd(struct rpcrdma_xprt *r_xprt, u32 grant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) struct rpc_xprt *xprt = &r_xprt->rx_xprt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) spin_lock(&xprt->transport_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, grant);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) spin_unlock(&xprt->transport_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) * rpcrdma_reset_cwnd - Reset the xprt's congestion window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * @r_xprt: controlling transport instance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) * Prepare @r_xprt for the next connection by reinitializing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * its credit grant to one (see RFC 8166, Section 3.3.3).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) struct rpc_xprt *xprt = &r_xprt->rx_xprt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) spin_lock(&xprt->transport_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) xprt->cong = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) __rpcrdma_update_cwnd_locked(xprt, &r_xprt->rx_buf, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) spin_unlock(&xprt->transport_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) * rpcrdma_inline_fixup - Scatter inline received data into rqst's iovecs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) * @rqst: controlling RPC request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) * @srcp: points to RPC message payload in receive buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) * @copy_len: remaining length of receive buffer content
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * @pad: Write chunk pad bytes needed (zero for pure inline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) * The upper layer has set the maximum number of bytes it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) * receive in each component of rq_rcv_buf. These values are set in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * the head.iov_len, page_len, tail.iov_len, and buflen fields.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * Unlike the TCP equivalent (xdr_partial_copy_from_skb), in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * many cases this function simply updates iov_base pointers in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * rq_rcv_buf to point directly to the received reply data, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * avoid copying reply data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * Returns the count of bytes which had to be memcopied.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) unsigned long fixup_copy_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) int i, npages, curlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) char *destp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) struct page **ppages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) int page_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) /* The head iovec is redirected to the RPC reply message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) * in the receive buffer, to avoid a memcopy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) rqst->rq_rcv_buf.head[0].iov_base = srcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) rqst->rq_private_buf.head[0].iov_base = srcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) /* The contents of the receive buffer that follow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * head.iov_len bytes are copied into the page list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) curlen = rqst->rq_rcv_buf.head[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) if (curlen > copy_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) curlen = copy_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) srcp += curlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) copy_len -= curlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) ppages = rqst->rq_rcv_buf.pages +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) (rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) fixup_copy_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) if (copy_len && rqst->rq_rcv_buf.page_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) int pagelist_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) pagelist_len = rqst->rq_rcv_buf.page_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) if (pagelist_len > copy_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) pagelist_len = copy_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) npages = PAGE_ALIGN(page_base + pagelist_len) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) for (i = 0; i < npages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) curlen = PAGE_SIZE - page_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) if (curlen > pagelist_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) curlen = pagelist_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) destp = kmap_atomic(ppages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) memcpy(destp + page_base, srcp, curlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) flush_dcache_page(ppages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) kunmap_atomic(destp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) srcp += curlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) copy_len -= curlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) fixup_copy_count += curlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) pagelist_len -= curlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) if (!pagelist_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) page_base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) /* Implicit padding for the last segment in a Write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * chunk is inserted inline at the front of the tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * iovec. The upper layer ignores the content of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) * the pad. Simply ensure inline content in the tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) * that follows the Write chunk is properly aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (pad)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) srcp -= pad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) /* The tail iovec is redirected to the remaining data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) * in the receive buffer, to avoid a memcopy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) if (copy_len || pad) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) rqst->rq_rcv_buf.tail[0].iov_base = srcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) rqst->rq_private_buf.tail[0].iov_base = srcp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) if (fixup_copy_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) trace_xprtrdma_fixup(rqst, fixup_copy_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) return fixup_copy_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) /* By convention, backchannel calls arrive via rdma_msg type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * messages, and never populate the chunk lists. This makes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * the RPC/RDMA header small and fixed in size, so it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * straightforward to check the RPC header's direction field.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) #if defined(CONFIG_SUNRPC_BACKCHANNEL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) struct xdr_stream *xdr = &rep->rr_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) if (rep->rr_proc != rdma_msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) /* Peek at stream contents without advancing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) p = xdr_inline_decode(xdr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) /* Chunk lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) if (xdr_item_is_present(p++))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) if (xdr_item_is_present(p++))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) if (xdr_item_is_present(p++))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /* RPC header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) if (*p++ != rep->rr_xid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) if (*p != cpu_to_be32(RPC_CALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) /* Now that we are sure this is a backchannel call,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) * advance to the RPC header.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) p = xdr_inline_decode(xdr, 3 * sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) goto out_short;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) rpcrdma_bc_receive_call(r_xprt, rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) out_short:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) pr_warn("RPC/RDMA short backward direction call\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) #else /* CONFIG_SUNRPC_BACKCHANNEL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) #endif /* CONFIG_SUNRPC_BACKCHANNEL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) u32 handle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) u64 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) p = xdr_inline_decode(xdr, 4 * sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) xdr_decode_rdma_segment(p, &handle, length, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) trace_xprtrdma_decode_seg(handle, *length, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) u32 segcount, seglength;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) p = xdr_inline_decode(xdr, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) *length = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) segcount = be32_to_cpup(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) while (segcount--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) if (decode_rdma_segment(xdr, &seglength))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) *length += seglength;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) /* In RPC-over-RDMA Version One replies, a Read list is never
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) * expected. This decoder is a stub that returns an error if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) * a Read list is present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) static int decode_read_list(struct xdr_stream *xdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) p = xdr_inline_decode(xdr, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) if (unlikely(xdr_item_is_present(p)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) /* Supports only one Write chunk in the Write list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) static int decode_write_list(struct xdr_stream *xdr, u32 *length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) u32 chunklen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) bool first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) *length = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) first = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) p = xdr_inline_decode(xdr, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) if (xdr_item_is_absent(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if (!first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) if (decode_write_chunk(xdr, &chunklen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) *length += chunklen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) first = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) } while (true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) p = xdr_inline_decode(xdr, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) *length = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) if (xdr_item_is_present(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) if (decode_write_chunk(xdr, length))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) struct rpc_rqst *rqst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) struct xdr_stream *xdr = &rep->rr_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) u32 writelist, replychunk, rpclen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) char *base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) /* Decode the chunk lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) if (decode_read_list(xdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) if (decode_write_list(xdr, &writelist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) if (decode_reply_chunk(xdr, &replychunk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) /* RDMA_MSG sanity checks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) if (unlikely(replychunk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) /* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) base = (char *)xdr_inline_decode(xdr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) rpclen = xdr_stream_remaining(xdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) r_xprt->rx_stats.fixup_copy_count +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) r_xprt->rx_stats.total_rdma_reply += writelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) return rpclen + xdr_align_size(writelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) static noinline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) struct xdr_stream *xdr = &rep->rr_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) u32 writelist, replychunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) /* Decode the chunk lists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) if (decode_read_list(xdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (decode_write_list(xdr, &writelist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) if (decode_reply_chunk(xdr, &replychunk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) /* RDMA_NOMSG sanity checks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) if (unlikely(writelist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (unlikely(!replychunk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) /* Reply chunk buffer already is the reply vector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) r_xprt->rx_stats.total_rdma_reply += replychunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) return replychunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) static noinline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) struct rpc_rqst *rqst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) struct xdr_stream *xdr = &rep->rr_stream;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) p = xdr_inline_decode(xdr, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) switch (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) case err_vers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) p = xdr_inline_decode(xdr, 2 * sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) dprintk("RPC: %s: server reports "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) "version error (%u-%u), xid %08x\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) be32_to_cpup(p), be32_to_cpu(*(p + 1)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) be32_to_cpu(rep->rr_xid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) case err_chunk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) dprintk("RPC: %s: server reports "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) "header decoding error, xid %08x\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) be32_to_cpu(rep->rr_xid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) dprintk("RPC: %s: server reports "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) "unrecognized error %d, xid %08x\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) /* Perform XID lookup, reconstruction of the RPC reply, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) * RPC completion while holding the transport lock to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) * the rep, rqst, and rq_task pointers remain stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) struct rpc_xprt *xprt = &r_xprt->rx_xprt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) struct rpc_rqst *rqst = rep->rr_rqst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) int status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) switch (rep->rr_proc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) case rdma_msg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) status = rpcrdma_decode_msg(r_xprt, rep, rqst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) case rdma_nomsg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) status = rpcrdma_decode_nomsg(r_xprt, rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) case rdma_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) status = rpcrdma_decode_error(r_xprt, rep, rqst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) status = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) if (status < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) goto out_badheader;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) spin_lock(&xprt->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) xprt_complete_rqst(rqst->rq_task, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) xprt_unpin_rqst(rqst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) spin_unlock(&xprt->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) out_badheader:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) trace_xprtrdma_reply_hdr(rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) r_xprt->rx_stats.bad_reply_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) rqst->rq_task->tk_status = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) static void rpcrdma_reply_done(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) struct rpcrdma_req *req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) container_of(kref, struct rpcrdma_req, rl_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) rpcrdma_complete_rqst(req->rl_reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) * rpcrdma_reply_handler - Process received RPC/RDMA messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) * @rep: Incoming rpcrdma_rep object to process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) * Errors must result in the RPC task either being awakened, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) * allowed to timeout, to discover the errors at that time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) struct rpc_xprt *xprt = &r_xprt->rx_xprt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) struct rpcrdma_req *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) struct rpc_rqst *rqst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) u32 credits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) __be32 *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) /* Any data means we had a useful conversation, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) * then we don't need to delay the next reconnect.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) if (xprt->reestablish_timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) xprt->reestablish_timeout = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) /* Fixed transport header fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) rep->rr_hdrbuf.head[0].iov_base, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) if (unlikely(!p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) goto out_shortreply;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) rep->rr_xid = *p++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) rep->rr_vers = *p++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) credits = be32_to_cpu(*p++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) rep->rr_proc = *p++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) if (rep->rr_vers != rpcrdma_version)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) goto out_badversion;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) if (rpcrdma_is_bcall(r_xprt, rep))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) /* Match incoming rpcrdma_rep to an rpcrdma_req to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * get context for handling any incoming chunks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) spin_lock(&xprt->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) if (!rqst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) goto out_norqst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) xprt_pin_rqst(rqst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) spin_unlock(&xprt->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) if (credits == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) credits = 1; /* don't deadlock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) else if (credits > r_xprt->rx_ep->re_max_requests)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) credits = r_xprt->rx_ep->re_max_requests;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) if (buf->rb_credits != credits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) rpcrdma_update_cwnd(r_xprt, credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) req = rpcr_to_rdmar(rqst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) if (req->rl_reply) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) rpcrdma_recv_buffer_put(req->rl_reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) req->rl_reply = rep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) rep->rr_rqst = rqst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) frwr_reminv(rep, &req->rl_registered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) if (!list_empty(&req->rl_registered))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) frwr_unmap_async(r_xprt, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) /* LocalInv completion will complete the RPC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) kref_put(&req->rl_kref, rpcrdma_reply_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) out_badversion:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) trace_xprtrdma_reply_vers(rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) out_norqst:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) spin_unlock(&xprt->queue_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) trace_xprtrdma_reply_rqst(rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) out_shortreply:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) trace_xprtrdma_reply_short(rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) rpcrdma_recv_buffer_put(rep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) }