^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This software is available to you under a choice of one of two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * licenses. You may choose to be licensed under the terms of the GNU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * General Public License (GPL) Version 2, available from the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * COPYING in the main directory of this source tree, or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * OpenIB.org BSD license below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Redistribution and use in source and binary forms, with or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * without modification, are permitted provided that the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * conditions are met:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * - Redistributions of source code must retain the above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * copyright notice, this list of conditions and the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * disclaimer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * - Redistributions in binary form must reproduce the above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * copyright notice, this list of conditions and the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * disclaimer in the documentation and/or other materials
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * provided with the distribution.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * SOFTWARE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/moduleparam.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <net/sock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/in.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/sizes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include "rds.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* When transmitting messages in rds_send_xmit, we need to emerge from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * time to time and briefly release the CPU. Otherwise the softlock watchdog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * will kick our shin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * Also, it seems fairer to not let one busy connection stall all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * others.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * send_batch_count is the number of times we'll loop in send_xmit. Setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * it to 0 will restore the old behavior (where we looped until we had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * drained the queue).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static int send_batch_count = SZ_1K;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) module_param(send_batch_count, int, 0444);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static void rds_send_remove_from_sock(struct list_head *messages, int status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Reset the send state. Callers must ensure that this doesn't race with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * rds_send_xmit().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) void rds_send_path_reset(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct rds_message *rm, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) if (cp->cp_xmit_rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) rm = cp->cp_xmit_rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) cp->cp_xmit_rm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* Tell the user the RDMA op is no longer mapped by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * transport. This isn't entirely true (it's flushed out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * independently) but as the connection is down, there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * no ongoing RDMA to/from that memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) rds_message_unmapped(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) cp->cp_xmit_sg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) cp->cp_xmit_hdr_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) cp->cp_xmit_data_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) cp->cp_xmit_atomic_sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) cp->cp_xmit_rdma_sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) cp->cp_xmit_data_sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) cp->cp_conn->c_map_queued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) cp->cp_unacked_packets = rds_sysctl_max_unacked_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) cp->cp_unacked_bytes = rds_sysctl_max_unacked_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /* Mark messages as retransmissions, and move them to the send q */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) spin_lock_irqsave(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) list_splice_init(&cp->cp_retrans, &cp->cp_send_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) EXPORT_SYMBOL_GPL(rds_send_path_reset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static int acquire_in_xmit(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) static void release_in_xmit(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) clear_bit(RDS_IN_XMIT, &cp->cp_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * hot path and finding waiters is very rare. We don't want to walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * the system-wide hashed waitqueue buckets in the fast path only to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * almost never find waiters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (waitqueue_active(&cp->cp_waitq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) wake_up_all(&cp->cp_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * We're making the conscious trade-off here to only send one message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * down the connection at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * Pro:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * - tx queueing is a simple fifo list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * - reassembly is optional and easily done by transports per conn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * - no per flow rx lookup at all, straight to the socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * - less per-frag memory and wire overhead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * Con:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * - queued acks can be delayed behind large messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * Depends:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * - small message latency is higher behind queued large messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * - large message latency isn't starved by intervening small sends
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) int rds_send_xmit(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct rds_connection *conn = cp->cp_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) struct rds_message *rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) unsigned int tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) struct scatterlist *sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) LIST_HEAD(to_be_dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) int batch_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) unsigned long send_gen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) int same_rm = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) batch_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * sendmsg calls here after having queued its message on the send
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * queue. We only have one task feeding the connection at a time. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * another thread is already feeding the queue then we back off. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * avoids blocking the caller and trading per-connection data between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * caches per message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (!acquire_in_xmit(cp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) rds_stats_inc(s_send_lock_contention);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if (rds_destroy_pending(cp->cp_conn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) release_in_xmit(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) ret = -ENETUNREACH; /* dont requeue send work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * we record the send generation after doing the xmit acquire.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * if someone else manages to jump in and do some work, we'll use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * this to avoid a goto restart farther down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * The acquire_in_xmit() check above ensures that only one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * caller can increment c_send_gen at any time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) send_gen = READ_ONCE(cp->cp_send_gen) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) WRITE_ONCE(cp->cp_send_gen, send_gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * we do the opposite to avoid races.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (!rds_conn_path_up(cp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) release_in_xmit(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) if (conn->c_trans->xmit_path_prepare)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) conn->c_trans->xmit_path_prepare(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * spin trying to push headers and data down the connection until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * the connection doesn't make forward progress.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) rm = cp->cp_xmit_rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) if (!rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) same_rm = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) same_rm++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (same_rm >= 4096) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) rds_stats_inc(s_send_stuck_rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * If between sending messages, we can send a pending congestion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * map update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) rm = rds_cong_update_alloc(conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) if (IS_ERR(rm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) ret = PTR_ERR(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) rm->data.op_active = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) rm->m_inc.i_conn_path = cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) rm->m_inc.i_conn = cp->cp_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) cp->cp_xmit_rm = rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * If not already working on one, grab the next message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * cp_xmit_rm holds a ref while we're sending this message down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * the connction. We can use this ref while holding the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * send_sem.. rds_send_reset() is serialized with it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (!rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) batch_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /* we want to process as big a batch as we can, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * we also want to avoid softlockups. If we've been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * through a lot of messages, lets back off and see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * if anyone else jumps in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (batch_count >= send_batch_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) goto over_batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) spin_lock_irqsave(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (!list_empty(&cp->cp_send_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) rm = list_entry(cp->cp_send_queue.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) struct rds_message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) m_conn_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) rds_message_addref(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * Move the message from the send queue to the retransmit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * list right away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) list_move_tail(&rm->m_conn_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) &cp->cp_retrans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) if (!rm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /* Unfortunately, the way Infiniband deals with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * RDMA to a bad MR key is by moving the entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * queue pair to error state. We cold possibly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * recover from that, but right now we drop the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * Therefore, we never retransmit messages with RDMA ops.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) if (test_bit(RDS_MSG_FLUSH, &rm->m_flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) (rm->rdma.op_active &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) spin_lock_irqsave(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) list_move(&rm->m_conn_item, &to_be_dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) /* Require an ACK every once in a while */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) len = ntohl(rm->m_inc.i_hdr.h_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (cp->cp_unacked_packets == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) cp->cp_unacked_bytes < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) cp->cp_unacked_packets =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) rds_sysctl_max_unacked_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) cp->cp_unacked_bytes =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) rds_sysctl_max_unacked_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) rds_stats_inc(s_send_ack_required);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) cp->cp_unacked_bytes -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) cp->cp_unacked_packets--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) cp->cp_xmit_rm = rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) /* The transport either sends the whole rdma or none of it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) if (rm->rdma.op_active && !cp->cp_xmit_rdma_sent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) rm->m_final_op = &rm->rdma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) /* The transport owns the mapped memory for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * You can't unmap it while it's on the send queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) set_bit(RDS_MSG_MAPPED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) wake_up_interruptible(&rm->m_flush_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) cp->cp_xmit_rdma_sent = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) if (rm->atomic.op_active && !cp->cp_xmit_atomic_sent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) rm->m_final_op = &rm->atomic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) /* The transport owns the mapped memory for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * You can't unmap it while it's on the send queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) set_bit(RDS_MSG_MAPPED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) wake_up_interruptible(&rm->m_flush_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) cp->cp_xmit_atomic_sent = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * A number of cases require an RDS header to be sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * even if there is no data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * We permit 0-byte sends; rds-ping depends on this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * However, if there are exclusively attached silent ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * we skip the hdr/data send, to enable silent operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (rm->data.op_nents == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) int ops_present;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) int all_ops_are_silent = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) ops_present = (rm->atomic.op_active || rm->rdma.op_active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (rm->atomic.op_active && !rm->atomic.op_silent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) all_ops_are_silent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) if (rm->rdma.op_active && !rm->rdma.op_silent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) all_ops_are_silent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) if (ops_present && all_ops_are_silent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) && !rm->m_rdma_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) rm->data.op_active = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (rm->data.op_active && !cp->cp_xmit_data_sent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) rm->m_final_op = &rm->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) ret = conn->c_trans->xmit(conn, rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) cp->cp_xmit_hdr_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) cp->cp_xmit_sg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) cp->cp_xmit_data_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) if (cp->cp_xmit_hdr_off < sizeof(struct rds_header)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) tmp = min_t(int, ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) sizeof(struct rds_header) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) cp->cp_xmit_hdr_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) cp->cp_xmit_hdr_off += tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) ret -= tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) sg = &rm->data.op_sg[cp->cp_xmit_sg];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) while (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) tmp = min_t(int, ret, sg->length -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) cp->cp_xmit_data_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) cp->cp_xmit_data_off += tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) ret -= tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) if (cp->cp_xmit_data_off == sg->length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) cp->cp_xmit_data_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) sg++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) cp->cp_xmit_sg++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) BUG_ON(ret != 0 && cp->cp_xmit_sg ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) rm->data.op_nents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) if (cp->cp_xmit_hdr_off == sizeof(struct rds_header) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) (cp->cp_xmit_sg == rm->data.op_nents))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) cp->cp_xmit_data_sent = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * A rm will only take multiple times through this loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * if there is a data op. Thus, if the data is sent (or there was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * none), then we're done with the rm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) if (!rm->data.op_active || cp->cp_xmit_data_sent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) cp->cp_xmit_rm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) cp->cp_xmit_sg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) cp->cp_xmit_hdr_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) cp->cp_xmit_data_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) cp->cp_xmit_rdma_sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) cp->cp_xmit_atomic_sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) cp->cp_xmit_data_sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) over_batch:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (conn->c_trans->xmit_path_complete)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) conn->c_trans->xmit_path_complete(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) release_in_xmit(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) /* Nuke any messages we decided not to retransmit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) if (!list_empty(&to_be_dropped)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) /* irqs on here, so we can put(), unlike above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) list_for_each_entry(rm, &to_be_dropped, m_conn_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * Other senders can queue a message after we last test the send queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * but before we clear RDS_IN_XMIT. In that case they'd back off and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * not try and send their newly queued message. We need to check the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * send queue after having cleared RDS_IN_XMIT so that their message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * doesn't get stuck on the send queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * If the transport cannot continue (i.e ret != 0), then it must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) * call us when more room is available, such as from the tx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * completion handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * We have an extra generation check here so that if someone manages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * to jump in after our release_in_xmit, we'll see that they have done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * some work and we will skip our goto
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) bool raced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) raced = send_gen != READ_ONCE(cp->cp_send_gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if ((test_bit(0, &conn->c_map_queued) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) !list_empty(&cp->cp_send_queue)) && !raced) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) if (batch_count < send_batch_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (rds_destroy_pending(cp->cp_conn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) ret = -ENETUNREACH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) } else if (raced) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) rds_stats_inc(s_send_lock_queue_raced);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) EXPORT_SYMBOL_GPL(rds_send_xmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) u32 len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) assert_spin_locked(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) BUG_ON(rs->rs_snd_bytes < len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) rs->rs_snd_bytes -= len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) if (rs->rs_snd_bytes == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) rds_stats_inc(s_send_queue_empty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) is_acked_func is_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (is_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) return is_acked(rm, ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return be64_to_cpu(rm->m_inc.i_hdr.h_sequence) <= ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * This is pretty similar to what happens below in the ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * handling code - except that we call here as soon as we get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * the IB send completion on the RDMA op and the accompanying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) void rds_rdma_send_complete(struct rds_message *rm, int status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) struct rds_sock *rs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) struct rm_rdma_op *ro;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) struct rds_notifier *notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) spin_lock_irqsave(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) ro = &rm->rdma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) ro->op_active && ro->op_notify && ro->op_notifier) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) notifier = ro->op_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) rs = rm->m_rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) sock_hold(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) notifier->n_status = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) spin_lock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) list_add_tail(¬ifier->n_list, &rs->rs_notify_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) spin_unlock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) ro->op_notifier = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) spin_unlock_irqrestore(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (rs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) rds_wake_sk_sleep(rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) sock_put(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * Just like above, except looks at atomic op
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) void rds_atomic_send_complete(struct rds_message *rm, int status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) struct rds_sock *rs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) struct rm_atomic_op *ao;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) struct rds_notifier *notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) spin_lock_irqsave(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) ao = &rm->atomic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) && ao->op_active && ao->op_notify && ao->op_notifier) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) notifier = ao->op_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) rs = rm->m_rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) sock_hold(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) notifier->n_status = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) spin_lock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) list_add_tail(¬ifier->n_list, &rs->rs_notify_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) spin_unlock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) ao->op_notifier = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) spin_unlock_irqrestore(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) if (rs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) rds_wake_sk_sleep(rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) sock_put(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * This is the same as rds_rdma_send_complete except we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * don't do any locking - we have all the ingredients (message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * socket, socket lock) and can just move the notifier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) __rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) struct rm_rdma_op *ro;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct rm_atomic_op *ao;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) ro = &rm->rdma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) if (ro->op_active && ro->op_notify && ro->op_notifier) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) ro->op_notifier->n_status = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) ro->op_notifier = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) ao = &rm->atomic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (ao->op_active && ao->op_notify && ao->op_notifier) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) ao->op_notifier->n_status = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) list_add_tail(&ao->op_notifier->n_list, &rs->rs_notify_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) ao->op_notifier = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) /* No need to wake the app - caller does this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * This removes messages from the socket's list if they're on it. The list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * argument must be private to the caller, we must be able to modify it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * without locks. The messages must have a reference held for their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * position on the list. This function will drop that reference after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * removing the messages from the 'messages' list regardless of if it found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) * the messages on the socket list or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) static void rds_send_remove_from_sock(struct list_head *messages, int status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) struct rds_sock *rs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) struct rds_message *rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) while (!list_empty(messages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) int was_on_sock = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) rm = list_entry(messages->next, struct rds_message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) m_conn_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) list_del_init(&rm->m_conn_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) * If we see this flag cleared then we're *sure* that someone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * else beat us to removing it from the sock. If we race
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * with their flag update we'll get the lock and then really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * see that the flag has been cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * The message spinlock makes sure nobody clears rm->m_rs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * while we're messing with it. It does not prevent the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * message from being removed from the socket, though.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) spin_lock_irqsave(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) goto unlock_and_drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) if (rs != rm->m_rs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) if (rs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) rds_wake_sk_sleep(rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) sock_put(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) rs = rm->m_rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) sock_hold(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) if (!rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) goto unlock_and_drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) spin_lock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) struct rm_rdma_op *ro = &rm->rdma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) struct rds_notifier *notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) list_del_init(&rm->m_sock_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) rds_send_sndbuf_remove(rs, rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) if (ro->op_active && ro->op_notifier &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) (ro->op_notify || (ro->op_recverr && status))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) notifier = ro->op_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) list_add_tail(¬ifier->n_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) &rs->rs_notify_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) if (!notifier->n_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) notifier->n_status = status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) rm->rdma.op_notifier = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) was_on_sock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) spin_unlock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) unlock_and_drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) spin_unlock_irqrestore(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (was_on_sock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (rs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) rds_wake_sk_sleep(rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) sock_put(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * Transports call here when they've determined that the receiver queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * messages up to, and including, the given sequence number. Messages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * moved to the retrans queue when rds_send_xmit picks them off the send
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * queue. This means that in the TCP case, the message may not have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * checks the RDS_MSG_HAS_ACK_SEQ bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) is_acked_func is_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) struct rds_message *rm, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) spin_lock_irqsave(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (!rds_send_is_acked(rm, ack, is_acked))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) list_move(&rm->m_conn_item, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) clear_bit(RDS_MSG_ON_CONN, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) /* order flag updates with spin locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) if (!list_empty(&list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) /* now remove the messages from the sock list as needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) EXPORT_SYMBOL_GPL(rds_send_path_drop_acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) is_acked_func is_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) WARN_ON(conn->c_trans->t_mp_capable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) rds_send_path_drop_acked(&conn->c_path[0], ack, is_acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) EXPORT_SYMBOL_GPL(rds_send_drop_acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) struct rds_message *rm, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) struct rds_connection *conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) struct rds_conn_path *cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) /* get all the messages we're dropping under the rs lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) spin_lock_irqsave(&rs->rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) if (dest &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) (!ipv6_addr_equal(&dest->sin6_addr, &rm->m_daddr) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) dest->sin6_port != rm->m_inc.i_hdr.h_dport))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) list_move(&rm->m_sock_item, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) rds_send_sndbuf_remove(rs, rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) /* order flag updates with the rs lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) spin_unlock_irqrestore(&rs->rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) if (list_empty(&list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) /* Remove the messages from the conn */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) list_for_each_entry(rm, &list, m_sock_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) conn = rm->m_inc.i_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (conn->c_trans->t_mp_capable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) cp = rm->m_inc.i_conn_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) cp = &conn->c_path[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) spin_lock_irqsave(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * Maybe someone else beat us to removing rm from the conn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * If we race with their flag update we'll get the lock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * then really see that the flag has been cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) list_del_init(&rm->m_conn_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * Couldn't grab m_rs_lock in top loop (lock ordering),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * but we can now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) spin_lock_irqsave(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) spin_lock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) spin_unlock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) spin_unlock_irqrestore(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) rds_wake_sk_sleep(rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) while (!list_empty(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) rm = list_entry(list.next, struct rds_message, m_sock_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) list_del_init(&rm->m_sock_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) rds_message_wait(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) /* just in case the code above skipped this message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) * because RDS_MSG_ON_CONN wasn't set, run it again here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) * taking m_rs_lock is the only thing that keeps us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) * from racing with ack processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) spin_lock_irqsave(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) spin_lock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) spin_unlock(&rs->rs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) spin_unlock_irqrestore(&rm->m_rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * we only want this to fire once so we use the callers 'queued'. It's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * possible that another thread can race with us and remove the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) * message from the flow with RDS_CANCEL_SENT_TO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) struct rds_conn_path *cp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) struct rds_message *rm, __be16 sport,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) __be16 dport, int *queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) if (*queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) /* this is the only place which holds both the socket's rs_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * and the connection's c_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) spin_lock_irqsave(&rs->rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) * If there is a little space in sndbuf, we don't queue anything,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) * and userspace gets -EAGAIN. But poll() indicates there's send
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) * room. This can lead to bad behavior (spinning) if snd_bytes isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * freed up by incoming acks. So we check the *old* value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) * rs_snd_bytes here to allow the last msg to exceed the buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * and poll() now knows no more data can be sent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) rs->rs_snd_bytes += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) /* let recv side know we are close to send space exhaustion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * This is probably not the optimal way to do it, as this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * means we set the flag on *all* messages as soon as our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) * throughput hits a certain threshold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) rds_message_addref(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) sock_hold(rds_rs_to_sk(rs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) rm->m_rs = rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) /* The code ordering is a little weird, but we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) trying to minimize the time we hold c_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) rm->m_inc.i_conn = conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) rm->m_inc.i_conn_path = cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) rds_message_addref(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) spin_lock(&cp->cp_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) rm->m_inc.i_hdr.h_sequence = cpu_to_be64(cp->cp_next_tx_seq++);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) list_add_tail(&rm->m_conn_item, &cp->cp_send_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) spin_unlock(&cp->cp_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) rm, len, rs, rs->rs_snd_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) (unsigned long long)be64_to_cpu(rm->m_inc.i_hdr.h_sequence));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) *queued = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) spin_unlock_irqrestore(&rs->rs_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) return *queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * rds_message is getting to be quite complicated, and we'd like to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * it all in one go. This figures out how big it needs to be up front.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) static int rds_rm_size(struct msghdr *msg, int num_sgs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) struct rds_iov_vector_arr *vct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) struct cmsghdr *cmsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) int size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) int cmsg_groups = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) bool zcopy_cookie = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) struct rds_iov_vector *iov, *tmp_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (num_sgs < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) for_each_cmsghdr(cmsg, msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) if (!CMSG_OK(msg, cmsg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (cmsg->cmsg_level != SOL_RDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) switch (cmsg->cmsg_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) case RDS_CMSG_RDMA_ARGS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) if (vct->indx >= vct->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) vct->len += vct->incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) tmp_iov =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) krealloc(vct->vec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) vct->len *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) sizeof(struct rds_iov_vector),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (!tmp_iov) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) vct->len -= vct->incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) vct->vec = tmp_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) iov = &vct->vec[vct->indx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) memset(iov, 0, sizeof(struct rds_iov_vector));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) vct->indx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) cmsg_groups |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) retval = rds_rdma_extra_size(CMSG_DATA(cmsg), iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) size += retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) case RDS_CMSG_ZCOPY_COOKIE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) zcopy_cookie = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) case RDS_CMSG_RDMA_DEST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) case RDS_CMSG_RDMA_MAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) cmsg_groups |= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) /* these are valid but do no add any size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) case RDS_CMSG_ATOMIC_CSWP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) case RDS_CMSG_ATOMIC_FADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) case RDS_CMSG_MASKED_ATOMIC_CSWP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) case RDS_CMSG_MASKED_ATOMIC_FADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) cmsg_groups |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) size += sizeof(struct scatterlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) size += num_sgs * sizeof(struct scatterlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) if (cmsg_groups == 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) struct cmsghdr *cmsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) u32 *cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) !rm->data.op_mmp_znotifier)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) cookie = CMSG_DATA(cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) rm->data.op_mmp_znotifier->z_cookie = *cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) struct msghdr *msg, int *allocated_mr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) struct rds_iov_vector_arr *vct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) struct cmsghdr *cmsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) int ret = 0, ind = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) for_each_cmsghdr(cmsg, msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (!CMSG_OK(msg, cmsg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) if (cmsg->cmsg_level != SOL_RDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) /* As a side effect, RDMA_DEST and RDMA_MAP will set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) switch (cmsg->cmsg_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) case RDS_CMSG_RDMA_ARGS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) if (ind >= vct->indx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) ret = rds_cmsg_rdma_args(rs, rm, cmsg, &vct->vec[ind]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) ind++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) case RDS_CMSG_RDMA_DEST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) ret = rds_cmsg_rdma_dest(rs, rm, cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) case RDS_CMSG_RDMA_MAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) ret = rds_cmsg_rdma_map(rs, rm, cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) *allocated_mr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) else if (ret == -ENODEV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) /* Accommodate the get_mr() case which can fail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) * if connection isn't established yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) case RDS_CMSG_ATOMIC_CSWP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) case RDS_CMSG_ATOMIC_FADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) case RDS_CMSG_MASKED_ATOMIC_CSWP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) case RDS_CMSG_MASKED_ATOMIC_FADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) ret = rds_cmsg_atomic(rs, rm, cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) case RDS_CMSG_ZCOPY_COOKIE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) ret = rds_cmsg_zcopy(rs, rm, cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) static int rds_send_mprds_hash(struct rds_sock *rs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) struct rds_connection *conn, int nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) if (conn->c_npaths == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) hash = RDS_MPATH_HASH(rs, RDS_MPATH_WORKERS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) hash = RDS_MPATH_HASH(rs, conn->c_npaths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (conn->c_npaths == 0 && hash != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) rds_send_ping(conn, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) /* The underlying connection is not up yet. Need to wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) * until it is up to be sure that the non-zero c_path can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) * used. But if we are interrupted, we have to use the zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) * c_path in case the connection ends up being non-MP capable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) if (conn->c_npaths == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) /* Cannot wait for the connection be made, so just use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) * the base c_path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) if (wait_event_interruptible(conn->c_hs_waitq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) conn->c_npaths != 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) hash = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) if (conn->c_npaths == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) hash = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) return hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) struct rds_rdma_args *args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) struct cmsghdr *cmsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) for_each_cmsghdr(cmsg, msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) if (!CMSG_OK(msg, cmsg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) if (cmsg->cmsg_level != SOL_RDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) if (cmsg->cmsg_len <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) CMSG_LEN(sizeof(struct rds_rdma_args)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) args = CMSG_DATA(cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) *rdma_bytes += args->remote_vec.bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) struct rds_sock *rs = rds_sk_to_rs(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) __be16 dport;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) struct rds_message *rm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) struct rds_connection *conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) int queued = 0, allocated_mr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) int nonblock = msg->msg_flags & MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) long timeo = sock_sndtimeo(sk, nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) struct rds_conn_path *cpath;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) struct in6_addr daddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) __u32 scope_id = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) size_t total_payload_len = payload_len, rdma_payload_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) int num_sgs = DIV_ROUND_UP(payload_len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) int namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) struct rds_iov_vector_arr vct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) int ind;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) memset(&vct, 0, sizeof(vct));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) /* expect 1 RDMA CMSG per rds_sendmsg. can still grow if more needed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) vct.incr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) /* Mirror Linux UDP mirror of BSD error message compatibility */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) /* XXX: Perhaps MSG_MORE someday */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) namelen = msg->msg_namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) if (namelen != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) if (namelen < sizeof(*usin)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) switch (usin->sin_family) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) case AF_INET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) ipv4_is_multicast(usin->sin_addr.s_addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) ipv6_addr_set_v4mapped(usin->sin_addr.s_addr, &daddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) dport = usin->sin_port;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) #if IS_ENABLED(CONFIG_IPV6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) case AF_INET6: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) int addr_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (namelen < sizeof(*sin6)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) addr_type = ipv6_addr_type(&sin6->sin6_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) if (!(addr_type & IPV6_ADDR_UNICAST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) __be32 addr4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (!(addr_type & IPV6_ADDR_MAPPED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) /* It is a mapped address. Need to do some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) * sanity checks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) addr4 = sin6->sin6_addr.s6_addr32[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) if (addr4 == htonl(INADDR_ANY) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) addr4 == htonl(INADDR_BROADCAST) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) ipv4_is_multicast(addr4)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) if (addr_type & IPV6_ADDR_LINKLOCAL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (sin6->sin6_scope_id == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) scope_id = sin6->sin6_scope_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) daddr = sin6->sin6_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) dport = sin6->sin6_port;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) /* We only care about consistency with ->connect() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) daddr = rs->rs_conn_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) dport = rs->rs_conn_port;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) scope_id = rs->rs_bound_scope_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (ipv6_addr_any(&rs->rs_bound_addr) || ipv6_addr_any(&daddr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) ret = -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) } else if (namelen != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) /* Cannot send to an IPv4 address using an IPv6 source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) * address and cannot send to an IPv6 address using an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) * IPv4 source address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (ipv6_addr_v4mapped(&daddr) ^
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) /* If the socket is already bound to a link local address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) * it can only send to peers on the same link. But allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) * communicating beween link local and non-link local address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) if (scope_id != rs->rs_bound_scope_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) if (!scope_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) scope_id = rs->rs_bound_scope_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) } else if (rs->rs_bound_scope_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) ret = rds_rdma_bytes(msg, &rdma_payload_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) total_payload_len += rdma_payload_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) if (max_t(size_t, payload_len, rdma_payload_len) > RDS_MAX_MSG_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) ret = -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) if (payload_len > rds_sk_sndbuf(rs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) ret = -EMSGSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) if (zcopy) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) /* size of rm including all sgs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) ret = rds_rm_size(msg, num_sgs, &vct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) rm = rds_message_alloc(ret, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) if (!rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) /* Attach data to the rm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) if (payload_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) if (IS_ERR(rm->data.op_sg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) ret = PTR_ERR(rm->data.op_sg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) rm->data.op_active = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) rm->m_daddr = daddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) /* rds_conn_create has a spinlock that runs with IRQ off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) * Caching the conn in the socket helps a lot. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) rs->rs_tos == rs->rs_conn->c_tos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) conn = rs->rs_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) conn = rds_conn_create_outgoing(sock_net(sock->sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) &rs->rs_bound_addr, &daddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) rs->rs_transport, rs->rs_tos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) sock->sk->sk_allocation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) scope_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) if (IS_ERR(conn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) ret = PTR_ERR(conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) rs->rs_conn = conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) if (conn->c_trans->t_mp_capable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) cpath = &conn->c_path[rds_send_mprds_hash(rs, conn, nonblock)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) cpath = &conn->c_path[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) rm->m_conn_path = cpath;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) /* Parse any control messages the user may have included. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /* Trigger connection so that its ready for the next retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) if (ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) rds_conn_connect_if_down(conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) &rm->rdma, conn->c_trans->xmit_rdma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) printk_ratelimited(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) &rm->atomic, conn->c_trans->xmit_atomic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) if (rds_destroy_pending(conn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) if (rds_conn_path_down(cpath))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) rds_check_all_paths(conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) rs->rs_seen_congestion = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) while (!rds_send_queue_rm(rs, conn, cpath, rm, rs->rs_bound_port,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) dport, &queued)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) rds_stats_inc(s_send_queue_full);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) if (nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) rds_send_queue_rm(rs, conn, cpath, rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) rs->rs_bound_port,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) dport,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) &queued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) rdsdebug("sendmsg woke queued %d timeo %ld\n", queued, timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) ret = timeo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) ret = -ETIMEDOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) * By now we've committed to the send. We reuse rds_send_worker()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) * to retry sends in the rds thread if the transport asks us to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) rds_stats_inc(s_send_queued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) ret = rds_send_xmit(cpath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (ret == -ENOMEM || ret == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) if (rds_destroy_pending(cpath->cp_conn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) ret = -ENETUNREACH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) queue_delayed_work(rds_wq, &cpath->cp_send_w, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) for (ind = 0; ind < vct.indx; ind++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) kfree(vct.vec[ind].iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) kfree(vct.vec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) return payload_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) for (ind = 0; ind < vct.indx; ind++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) kfree(vct.vec[ind].iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) kfree(vct.vec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) /* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) * or in any other way, we need to destroy the MR again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) if (allocated_mr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) rds_rdma_unuse(rs, rds_rdma_cookie_key(rm->m_rdma_cookie), 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) if (rm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) * send out a probe. Can be shared by rds_send_ping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) * rds_send_pong, rds_send_hb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) * rds_send_hb should use h_flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) * RDS_FLAG_HB_PING|RDS_FLAG_ACK_REQUIRED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) * or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) * RDS_FLAG_HB_PONG|RDS_FLAG_ACK_REQUIRED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) rds_send_probe(struct rds_conn_path *cp, __be16 sport,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) __be16 dport, u8 h_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) struct rds_message *rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) rm = rds_message_alloc(0, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) if (!rm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) rm->m_daddr = cp->cp_conn->c_faddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) rm->data.op_active = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) rds_conn_path_connect_if_down(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) ret = rds_cong_wait(cp->cp_conn->c_fcong, dport, 1, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) spin_lock_irqsave(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) list_add_tail(&rm->m_conn_item, &cp->cp_send_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) rds_message_addref(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) rm->m_inc.i_conn = cp->cp_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) rm->m_inc.i_conn_path = cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) cp->cp_next_tx_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) rm->m_inc.i_hdr.h_flags |= h_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) cp->cp_next_tx_seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) cp->cp_conn->c_trans->t_mp_capable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) rds_message_add_extension(&rm->m_inc.i_hdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) RDS_EXTHDR_NPATHS, &npaths,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) sizeof(npaths));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) rds_message_add_extension(&rm->m_inc.i_hdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) RDS_EXTHDR_GEN_NUM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) &my_gen_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) sizeof(u32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) rds_stats_inc(s_send_queued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) rds_stats_inc(s_send_pong);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) /* schedule the send work on rds_wq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) if (!rds_destroy_pending(cp->cp_conn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) if (rm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) rds_message_put(rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) rds_send_pong(struct rds_conn_path *cp, __be16 dport)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) return rds_send_probe(cp, 0, dport, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) rds_send_ping(struct rds_connection *conn, int cp_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) struct rds_conn_path *cp = &conn->c_path[cp_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) spin_lock_irqsave(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) if (conn->c_ping_triggered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) conn->c_ping_triggered = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) spin_unlock_irqrestore(&cp->cp_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) rds_send_probe(cp, cpu_to_be16(RDS_FLAG_PROBE_PORT), 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) EXPORT_SYMBOL_GPL(rds_send_ping);