^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #ifndef _RDS_RDS_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #define _RDS_RDS_H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <net/sock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/scatterlist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <rdma/rdma_cm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/rds.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/rhashtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/refcount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/in6.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "info.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * RDS Network protocol version
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define RDS_PROTOCOL_3_0 0x0300
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define RDS_PROTOCOL_3_1 0x0301
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define RDS_PROTOCOL_4_0 0x0400
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define RDS_PROTOCOL_4_1 0x0401
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define RDS_PROTOCOL_MINOR(v) ((v) & 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) /* The following ports, 16385, 18634, 18635, are registered with IANA as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * the ports to be used for RDS over TCP and UDP. Currently, only RDS over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * TCP and RDS over IB/RDMA are implemented. 18634 is the historical value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * used for the RDMA_CM listener port. RDS/TCP uses port 16385. After
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * IPv6 work, RDMA_CM also uses 16385 as the listener port. 18634 is kept
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * to ensure compatibility with older RDS modules. Those ports are defined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * in each transport's header file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define RDS_PORT 18634
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #ifdef ATOMIC64_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define KERNEL_HAS_ATOMIC64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #ifdef RDS_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) /* sigh, pr_debug() causes unused variable warnings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static inline __printf(1, 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) void rdsdebug(char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #define RDS_FRAG_SHIFT 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* Used to limit both RDMA and non-RDMA RDS message to 1MB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #define RDS_MAX_MSG_SIZE ((unsigned int)(1 << 20))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #define RDS_CONG_MAP_BYTES (65536 / 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct rds_cong_map {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct rb_node m_rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) struct in6_addr m_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) wait_queue_head_t m_waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct list_head m_conn_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * This is how we will track the connection state:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * A connection is always in one of the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * states. Updates to the state are atomic and imply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * a memory barrier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) RDS_CONN_DOWN = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) RDS_CONN_CONNECTING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) RDS_CONN_DISCONNECTING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) RDS_CONN_UP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) RDS_CONN_RESETTING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) RDS_CONN_ERROR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /* Bits for c_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #define RDS_LL_SEND_FULL 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define RDS_RECONNECT_PENDING 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define RDS_IN_XMIT 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #define RDS_RECV_REFILL 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define RDS_DESTROY_PENDING 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /* Max number of multipaths per RDS connection. Must be a power of 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define RDS_MPATH_WORKERS 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) (rs)->rs_hash_initval) & ((n) - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /* Per mpath connection state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) struct rds_conn_path {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) struct rds_connection *cp_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) struct rds_message *cp_xmit_rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) unsigned long cp_xmit_sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) unsigned int cp_xmit_hdr_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) unsigned int cp_xmit_data_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) unsigned int cp_xmit_atomic_sent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) unsigned int cp_xmit_rdma_sent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) unsigned int cp_xmit_data_sent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) spinlock_t cp_lock; /* protect msg queues */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) u64 cp_next_tx_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) struct list_head cp_send_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct list_head cp_retrans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) u64 cp_next_rx_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) void *cp_transport_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) atomic_t cp_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) unsigned long cp_send_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) unsigned long cp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) unsigned long cp_reconnect_jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct delayed_work cp_send_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct delayed_work cp_recv_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) struct delayed_work cp_conn_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) struct work_struct cp_down_w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct mutex cp_cm_lock; /* protect cp_state & cm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) wait_queue_head_t cp_waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) unsigned int cp_unacked_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) unsigned int cp_unacked_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) unsigned int cp_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /* One rds_connection per RDS address pair */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) struct rds_connection {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct hlist_node c_hash_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) struct in6_addr c_laddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) struct in6_addr c_faddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) int c_dev_if; /* ifindex used for this conn */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) int c_bound_if; /* ifindex of c_laddr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) unsigned int c_loopback:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) c_isv6:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) c_ping_triggered:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) c_pad_to_32:29;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) int c_npaths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) struct rds_connection *c_passive;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) struct rds_transport *c_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) struct rds_cong_map *c_lcong;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) struct rds_cong_map *c_fcong;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* Protocol version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) unsigned int c_proposed_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) unsigned int c_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) possible_net_t c_net;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) /* TOS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) u8 c_tos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) struct list_head c_map_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) unsigned long c_map_queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) struct rds_conn_path *c_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) wait_queue_head_t c_hs_waitq; /* handshake waitq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) u32 c_my_gen_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) u32 c_peer_gen_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct net *rds_conn_net(struct rds_connection *conn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) return read_pnet(&conn->c_net);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) static inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) void rds_conn_net_set(struct rds_connection *conn, struct net *net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) write_pnet(&conn->c_net, net);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) #define RDS_FLAG_CONG_BITMAP 0x01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) #define RDS_FLAG_ACK_REQUIRED 0x02
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) #define RDS_FLAG_RETRANSMITTED 0x04
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) #define RDS_MAX_ADV_CREDIT 255
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) /* RDS_FLAG_PROBE_PORT is the reserved sport used for sending a ping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * probe to exchange control information before establishing a connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * Currently the control information that is exchanged is the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * supported paths. If the peer is a legacy (older kernel revision) peer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * it would return a pong message without additional control information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * that would then alert the sender that the peer was an older rev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) #define RDS_FLAG_PROBE_PORT 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) #define RDS_HS_PROBE(sport, dport) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) ((sport == RDS_FLAG_PROBE_PORT && dport == 0) || \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) (sport == 0 && dport == RDS_FLAG_PROBE_PORT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * Maximum space available for extension headers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) #define RDS_HEADER_EXT_SPACE 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct rds_header {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) __be64 h_sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) __be64 h_ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) __be32 h_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) __be16 h_sport;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) __be16 h_dport;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) u8 h_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) u8 h_credit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) u8 h_padding[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) __sum16 h_csum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) u8 h_exthdr[RDS_HEADER_EXT_SPACE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * Reserved - indicates end of extensions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) #define RDS_EXTHDR_NONE 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * This extension header is included in the very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * first message that is sent on a new connection,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * and identifies the protocol level. This will help
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * rolling updates if a future change requires breaking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * the protocol.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * NB: This is no longer true for IB, where we do a version
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * negotiation during the connection setup phase (protocol
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * version information is included in the RDMA CM private data).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) #define RDS_EXTHDR_VERSION 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) struct rds_ext_header_version {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) __be32 h_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * This extension header is included in the RDS message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * chasing an RDMA operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) #define RDS_EXTHDR_RDMA 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) struct rds_ext_header_rdma {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) __be32 h_rdma_rkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * This extension header tells the peer about the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * destination <R_Key,offset> of the requested RDMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) #define RDS_EXTHDR_RDMA_DEST 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) struct rds_ext_header_rdma_dest {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) __be32 h_rdma_rkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) __be32 h_rdma_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /* Extension header announcing number of paths.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * Implicit length = 2 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) #define RDS_EXTHDR_NPATHS 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) #define RDS_EXTHDR_GEN_NUM 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) #define __RDS_EXTHDR_MAX 16 /* for now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) #define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) #define RDS_MSG_RX_HDR 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) #define RDS_MSG_RX_START 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) #define RDS_MSG_RX_END 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) #define RDS_MSG_RX_CMSG 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /* The following values are whitelisted for usercopy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) struct rds_inc_usercopy {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) rds_rdma_cookie_t rdma_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) ktime_t rx_tstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) struct rds_incoming {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) refcount_t i_refcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) struct list_head i_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) struct rds_connection *i_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) struct rds_conn_path *i_conn_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) struct rds_header i_hdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) unsigned long i_rx_jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct in6_addr i_saddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) struct rds_inc_usercopy i_usercopy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) struct rds_mr {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) struct rb_node r_rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) struct kref r_kref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) u32 r_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) /* A copy of the creation flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) unsigned int r_use_once:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) unsigned int r_invalidate:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) unsigned int r_write:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) struct rds_sock *r_sock; /* back pointer to the socket that owns us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) struct rds_transport *r_trans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) void *r_trans_private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return r_key | (((u64) offset) << 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) return cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) return cookie >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) /* atomic operation types */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) #define RDS_ATOMIC_TYPE_CSWP 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) #define RDS_ATOMIC_TYPE_FADD 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * m_sock_item and m_conn_item are on lists that are serialized under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * conn->c_lock. m_sock_item has additional meaning in that once it is empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * the message will not be put back on the retransmit list after being sent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * messages that are canceled while being sent rely on this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * m_inc is used by loopback so that it can pass an incoming message straight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) * back up into the rx path. It embeds a wire header which is also used by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * the send path, which is kind of awkward.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * m_sock_item indicates the message's presence on a socket's send or receive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * queue. m_rs will point to that socket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * m_daddr is used by cancellation to prune messages to a given destination.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * nesting. As paths iterate over messages on a sock, or conn, they must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * also lock the conn, or sock, to remove the message from those lists too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * Testing the flag to determine if the message is still on the lists lets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * us avoid testing the list_head directly. That means each path can use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * the message's list_head to keep it on a local list while juggling locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * without confusing the other path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * m_ack_seq is an optional field set by transports who need a different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * sequence number range to invalidate. They can use this in a callback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * that they pass to rds_send_drop_acked() to see if each message has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) * had ack_seq set yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) #define RDS_MSG_ON_SOCK 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) #define RDS_MSG_ON_CONN 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) #define RDS_MSG_HAS_ACK_SEQ 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) #define RDS_MSG_ACK_REQUIRED 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) #define RDS_MSG_RETRANSMITTED 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) #define RDS_MSG_MAPPED 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) #define RDS_MSG_PAGEVEC 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) #define RDS_MSG_FLUSH 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) struct rds_znotifier {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) struct mmpin z_mmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) u32 z_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) struct rds_msg_zcopy_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) struct list_head rs_zcookie_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) struct rds_znotifier znotif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) struct rds_zcopy_cookies zcookies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) struct rds_msg_zcopy_queue {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) struct list_head zcookie_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) spinlock_t lock; /* protects zcookie_head queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) spin_lock_init(&q->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) INIT_LIST_HEAD(&q->zcookie_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) struct rds_iov_vector {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) struct rds_iovec *iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) struct rds_iov_vector_arr {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) struct rds_iov_vector *vec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) int indx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) int incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) struct rds_message {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) refcount_t m_refcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) struct list_head m_sock_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) struct list_head m_conn_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) struct rds_incoming m_inc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) u64 m_ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) struct in6_addr m_daddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) unsigned long m_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) /* Never access m_rs without holding m_rs_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * Lock nesting is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * rm->m_rs_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * -> rs->rs_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) spinlock_t m_rs_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) wait_queue_head_t m_flush_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct rds_sock *m_rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) /* cookie to send to remote, in rds header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) rds_rdma_cookie_t m_rdma_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) unsigned int m_used_sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) unsigned int m_total_sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) void *m_final_op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) struct rm_atomic_op {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) int op_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) uint64_t compare;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) uint64_t swap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) uint64_t compare_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) uint64_t swap_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) } op_m_cswp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) uint64_t add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) uint64_t nocarry_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) } op_m_fadd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) u32 op_rkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) u64 op_remote_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) unsigned int op_notify:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) unsigned int op_recverr:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) unsigned int op_mapped:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) unsigned int op_silent:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) unsigned int op_active:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) struct scatterlist *op_sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) struct rds_notifier *op_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) struct rds_mr *op_rdma_mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) } atomic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) struct rm_rdma_op {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) u32 op_rkey;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) u64 op_remote_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) unsigned int op_write:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) unsigned int op_fence:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) unsigned int op_notify:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) unsigned int op_recverr:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) unsigned int op_mapped:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) unsigned int op_silent:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) unsigned int op_active:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) unsigned int op_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) unsigned int op_nents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) unsigned int op_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) struct scatterlist *op_sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) struct rds_notifier *op_notifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) struct rds_mr *op_rdma_mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) u64 op_odp_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) struct rds_mr *op_odp_mr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) } rdma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) struct rm_data_op {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) unsigned int op_active:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) unsigned int op_nents;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) unsigned int op_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) unsigned int op_dmasg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) unsigned int op_dmaoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) struct rds_znotifier *op_mmp_znotifier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) struct scatterlist *op_sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) } data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) struct rds_conn_path *m_conn_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * The RDS notifier is used (optionally) to tell the application about
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * completed RDMA operations. Rather than keeping the whole rds message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * around on the queue, we allocate a small notifier that is put on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * socket's notifier_list. Notifications are delivered to the application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * through control messages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) struct rds_notifier {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) struct list_head n_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) uint64_t n_user_token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) int n_status;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /* Available as part of RDS core, so doesn't need to participate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * in get_preferred transport etc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) #define RDS_TRANS_LOOP 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * struct rds_transport - transport specific behavioural hooks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * @xmit: .xmit is called by rds_send_xmit() to tell the transport to send
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * part of a message. The caller serializes on the send_sem so this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * doesn't need to be reentrant for a given conn. The header must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * sent before the data payload. .xmit must be prepared to send a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * message with no data payload. .xmit should return the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * bytes that were sent down the connection, including header bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * Returning 0 tells the caller that it doesn't need to perform any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * additional work now. This is usually the case when the transport has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * filled the sending queue for its connection and will handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * triggering the rds thread to continue the send when space becomes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * available. Returning -EAGAIN tells the caller to retry the send
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * immediately. Returning -ENOMEM tells the caller to retry the send at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) * some point in the future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * it returns the connection can not call rds_recv_incoming().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * This will only be called once after conn_connect returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * non-zero success and will The caller serializes this with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * the send and connecting paths (xmit_* and conn_*). The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * transport is responsible for other serialization, including
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * rds_recv_incoming(). This is called in process context but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * should try hard not to block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) struct rds_transport {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) char t_name[TRANSNAMSIZ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) struct list_head t_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) struct module *t_owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) unsigned int t_prefer_loopback:1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) t_mp_capable:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) unsigned int t_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) int (*laddr_check)(struct net *net, const struct in6_addr *addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) __u32 scope_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) void (*conn_free)(void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) int (*conn_path_connect)(struct rds_conn_path *cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) void (*conn_path_shutdown)(struct rds_conn_path *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) void (*xmit_path_prepare)(struct rds_conn_path *cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) void (*xmit_path_complete)(struct rds_conn_path *cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) unsigned int hdr_off, unsigned int sg, unsigned int off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) int (*recv_path)(struct rds_conn_path *cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) void (*inc_free)(struct rds_incoming *inc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) struct rdma_cm_event *event, bool isv6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) int (*cm_initiate_connect)(struct rdma_cm_id *cm_id, bool isv6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) void (*cm_connect_complete)(struct rds_connection *conn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) struct rdma_cm_event *event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) unsigned int (*stats_info_copy)(struct rds_info_iterator *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) unsigned int avail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) void (*exit)(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) struct rds_sock *rs, u32 *key_ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) struct rds_connection *conn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) u64 start, u64 length, int need_odp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) void (*sync_mr)(void *trans_private, int direction);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) void (*free_mr)(void *trans_private, int invalidate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) void (*flush_mrs)(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) bool (*t_unloading)(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) u8 (*get_tos_map)(u8 tos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) /* Bind hash table key length. It is the sum of the size of a struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * in6_addr, a scope_id and a port.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) #define RDS_BOUND_KEY_LEN \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) (sizeof(struct in6_addr) + sizeof(__u32) + sizeof(__be16))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) struct rds_sock {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) struct sock rs_sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) u64 rs_user_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) u64 rs_user_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) * bound_addr used for both incoming and outgoing, no INADDR_ANY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct rhash_head rs_bound_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) u8 rs_bound_key[RDS_BOUND_KEY_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) struct sockaddr_in6 rs_bound_sin6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) #define rs_bound_addr rs_bound_sin6.sin6_addr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) #define rs_bound_addr_v4 rs_bound_sin6.sin6_addr.s6_addr32[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) #define rs_bound_port rs_bound_sin6.sin6_port
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) #define rs_bound_scope_id rs_bound_sin6.sin6_scope_id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) struct in6_addr rs_conn_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) #define rs_conn_addr_v4 rs_conn_addr.s6_addr32[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) __be16 rs_conn_port;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) struct rds_transport *rs_transport;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * rds_sendmsg caches the conn it used the last time around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) * This helps avoid costly lookups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) struct rds_connection *rs_conn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) /* flag indicating we were congested or not */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) int rs_congested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) /* seen congestion (ENOBUFS) when sending? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) int rs_seen_congestion;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /* rs_lock protects all these adjacent members before the newline */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) spinlock_t rs_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) struct list_head rs_send_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) u32 rs_snd_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) int rs_rcv_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) struct list_head rs_notify_queue; /* currently used for failed RDMAs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) /* Congestion wake_up. If rs_cong_monitor is set, we use cong_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * to decide whether the application should be woken up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * If not set, we use rs_cong_track to find out whether a cong map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * update arrived.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) uint64_t rs_cong_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) uint64_t rs_cong_notify;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) struct list_head rs_cong_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) unsigned long rs_cong_track;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) * rs_recv_lock protects the receive queue, and is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) * used to serialize with rds_release.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) rwlock_t rs_recv_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) struct list_head rs_recv_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) /* just for stats reporting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) struct list_head rs_item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) /* these have their own lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) spinlock_t rs_rdma_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) struct rb_root rs_rdma_keys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) /* Socket options - in case there will be more */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) unsigned char rs_recverr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) rs_cong_monitor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) u32 rs_hash_initval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) /* Socket receive path trace points*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) u8 rs_rx_traces;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) struct rds_msg_zcopy_queue rs_zcookie_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) u8 rs_tos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) return container_of(sk, struct rds_sock, rs_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) static inline struct sock *rds_rs_to_sk(struct rds_sock *rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) return &rs->rs_sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) * to account for overhead. We don't account for overhead, we just apply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) * the number of payload bytes to the specified value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) static inline int rds_sk_sndbuf(struct rds_sock *rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) return rds_rs_to_sk(rs)->sk_sndbuf / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) static inline int rds_sk_rcvbuf(struct rds_sock *rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return rds_rs_to_sk(rs)->sk_rcvbuf / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) struct rds_statistics {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) uint64_t s_conn_reset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) uint64_t s_recv_drop_bad_checksum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) uint64_t s_recv_drop_old_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) uint64_t s_recv_drop_no_sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) uint64_t s_recv_drop_dead_sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) uint64_t s_recv_deliver_raced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) uint64_t s_recv_delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) uint64_t s_recv_queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) uint64_t s_recv_immediate_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) uint64_t s_recv_delayed_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) uint64_t s_recv_ack_required;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) uint64_t s_recv_rdma_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) uint64_t s_recv_ping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) uint64_t s_send_queue_empty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) uint64_t s_send_queue_full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) uint64_t s_send_lock_contention;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) uint64_t s_send_lock_queue_raced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) uint64_t s_send_immediate_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) uint64_t s_send_delayed_retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) uint64_t s_send_drop_acked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) uint64_t s_send_ack_required;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) uint64_t s_send_queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) uint64_t s_send_rdma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) uint64_t s_send_rdma_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) uint64_t s_send_pong;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) uint64_t s_page_remainder_hit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) uint64_t s_page_remainder_miss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) uint64_t s_copy_to_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) uint64_t s_copy_from_user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) uint64_t s_cong_update_queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) uint64_t s_cong_update_received;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) uint64_t s_cong_send_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) uint64_t s_cong_send_blocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) uint64_t s_recv_bytes_added_to_socket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) uint64_t s_recv_bytes_removed_from_socket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) uint64_t s_send_stuck_rm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) /* af_rds.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) void rds_sock_addref(struct rds_sock *rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) void rds_sock_put(struct rds_sock *rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) void rds_wake_sk_sleep(struct rds_sock *rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) static inline void __rds_wake_sk_sleep(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) wait_queue_head_t *waitq = sk_sleep(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) if (!sock_flag(sk, SOCK_DEAD) && waitq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) wake_up(waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) extern wait_queue_head_t rds_poll_waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) /* bind.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) void rds_remove_bound(struct rds_sock *rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) __u32 scope_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) int rds_bind_lock_init(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) void rds_bind_lock_destroy(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) /* cong.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) int rds_cong_get_maps(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) void rds_cong_add_conn(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) void rds_cong_remove_conn(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) void rds_cong_set_bit(struct rds_cong_map *map, __be16 port);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) void rds_cong_queue_updates(struct rds_cong_map *map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) void rds_cong_map_updated(struct rds_cong_map *map, uint64_t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) int rds_cong_updated_since(unsigned long *recent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) void rds_cong_add_socket(struct rds_sock *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) void rds_cong_remove_socket(struct rds_sock *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) void rds_cong_exit(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) /* connection.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) extern u32 rds_gen_num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) int rds_conn_init(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) void rds_conn_exit(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) struct rds_connection *rds_conn_create(struct net *net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) const struct in6_addr *laddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) const struct in6_addr *faddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) struct rds_transport *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) u8 tos, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) int dev_if);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) struct rds_connection *rds_conn_create_outgoing(struct net *net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) const struct in6_addr *laddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) const struct in6_addr *faddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) struct rds_transport *trans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) u8 tos, gfp_t gfp, int dev_if);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) void rds_conn_shutdown(struct rds_conn_path *cpath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) void rds_conn_destroy(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) void rds_conn_drop(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) void rds_conn_connect_if_down(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) void rds_check_all_paths(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) void rds_for_each_conn_info(struct socket *sock, unsigned int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) struct rds_info_iterator *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) struct rds_info_lengths *lens,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) int (*visitor)(struct rds_connection *, void *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) u64 *buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) size_t item_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) __printf(2, 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) #define rds_conn_path_error(cp, fmt...) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) __rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) rds_conn_path_transition(struct rds_conn_path *cp, int old, int new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) return atomic_cmpxchg(&cp->cp_state, old, new) == old;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) rds_conn_transition(struct rds_connection *conn, int old, int new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) WARN_ON(conn->c_trans->t_mp_capable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) return rds_conn_path_transition(&conn->c_path[0], old, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) rds_conn_path_state(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) return atomic_read(&cp->cp_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) rds_conn_state(struct rds_connection *conn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) WARN_ON(conn->c_trans->t_mp_capable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) return rds_conn_path_state(&conn->c_path[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) rds_conn_path_up(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) return atomic_read(&cp->cp_state) == RDS_CONN_UP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) rds_conn_path_down(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) return atomic_read(&cp->cp_state) == RDS_CONN_DOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) rds_conn_up(struct rds_connection *conn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) WARN_ON(conn->c_trans->t_mp_capable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) return rds_conn_path_up(&conn->c_path[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) rds_conn_path_connecting(struct rds_conn_path *cp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) return atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) rds_conn_connecting(struct rds_connection *conn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) WARN_ON(conn->c_trans->t_mp_capable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) return rds_conn_path_connecting(&conn->c_path[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) /* message.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) bool zcopy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) __be16 dport, u64 seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) int rds_message_add_extension(struct rds_header *hdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) unsigned int type, const void *data, unsigned int len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) int rds_message_next_extension(struct rds_header *hdr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) unsigned int *pos, void *buf, unsigned int *buflen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) void rds_message_inc_free(struct rds_incoming *inc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) void rds_message_addref(struct rds_message *rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) void rds_message_put(struct rds_message *rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) void rds_message_wait(struct rds_message *rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) void rds_message_unmapped(struct rds_message *rm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) static inline void rds_message_make_checksum(struct rds_header *hdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) hdr->h_csum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) static inline int rds_message_verify_checksum(const struct rds_header *hdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) /* page.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) gfp_t gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) void rds_page_exit(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) /* recv.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) struct in6_addr *saddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) struct in6_addr *saddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) void rds_inc_put(struct rds_incoming *inc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) struct in6_addr *daddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) struct rds_incoming *inc, gfp_t gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) int msg_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) void rds_clear_recv_queue(struct rds_sock *rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) void rds_inc_info_copy(struct rds_incoming *inc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) struct rds_info_iterator *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) __be32 saddr, __be32 daddr, int flip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) void rds6_inc_info_copy(struct rds_incoming *inc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) struct rds_info_iterator *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) struct in6_addr *saddr, struct in6_addr *daddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) int flip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) /* send.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) void rds_send_path_reset(struct rds_conn_path *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) int rds_send_xmit(struct rds_conn_path *cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) struct sockaddr_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) is_acked_func is_acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) is_acked_func is_acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) void rds_send_ping(struct rds_connection *conn, int cp_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) int rds_send_pong(struct rds_conn_path *cp, __be16 dport);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) /* rdma.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) void rds_rdma_drop_keys(struct rds_sock *rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) int rds_rdma_extra_size(struct rds_rdma_args *args,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) struct rds_iov_vector *iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) struct cmsghdr *cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) struct cmsghdr *cmsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) struct rds_iov_vector *vec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) struct cmsghdr *cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) void rds_rdma_free_op(struct rm_rdma_op *ro);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) void rds_atomic_free_op(struct rm_atomic_op *ao);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) struct cmsghdr *cmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) void __rds_put_mr_final(struct kref *kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) static inline bool rds_destroy_pending(struct rds_connection *conn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) return !check_net(rds_conn_net(conn)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) ODP_NOT_NEEDED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) ODP_ZEROBASED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) ODP_VIRTUAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) /* stats.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) #define rds_stats_inc_which(which, member) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) per_cpu(which, get_cpu()).member++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) put_cpu(); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) #define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) #define rds_stats_add_which(which, member, count) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) per_cpu(which, get_cpu()).member += count; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) put_cpu(); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) #define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) int rds_stats_init(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) void rds_stats_exit(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) void rds_stats_info_copy(struct rds_info_iterator *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) uint64_t *values, const char *const *names,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) size_t nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) /* sysctl.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) int rds_sysctl_init(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) void rds_sysctl_exit(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) extern unsigned long rds_sysctl_sndbuf_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) extern unsigned long rds_sysctl_sndbuf_default;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) extern unsigned long rds_sysctl_sndbuf_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) extern unsigned long rds_sysctl_reconnect_min_jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) extern unsigned long rds_sysctl_reconnect_max_jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) extern unsigned int rds_sysctl_max_unacked_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) extern unsigned int rds_sysctl_max_unacked_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) extern unsigned int rds_sysctl_ping_enable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) extern unsigned long rds_sysctl_trace_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) extern unsigned int rds_sysctl_trace_level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) /* threads.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) int rds_threads_init(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) void rds_threads_exit(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) extern struct workqueue_struct *rds_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) void rds_queue_reconnect(struct rds_conn_path *cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) void rds_connect_worker(struct work_struct *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) void rds_shutdown_worker(struct work_struct *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) void rds_send_worker(struct work_struct *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) void rds_recv_worker(struct work_struct *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) void rds_connect_complete(struct rds_connection *conn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) int rds_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) /* transport.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) void rds_trans_register(struct rds_transport *trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) void rds_trans_unregister(struct rds_transport *trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) struct rds_transport *rds_trans_get_preferred(struct net *net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) const struct in6_addr *addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) __u32 scope_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) void rds_trans_put(struct rds_transport *trans);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) unsigned int avail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) struct rds_transport *rds_trans_get(int t_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) int rds_trans_init(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) void rds_trans_exit(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) #endif