^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* XDP sockets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * AF_XDP sockets allows a channel between XDP programs and userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * applications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright(c) 2018 Intel Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Author(s): Björn Töpel <bjorn.topel@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Magnus Karlsson <magnus.karlsson@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/if_xdp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/socket.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/net.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/netdevice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/rculist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <net/xdp_sock_drv.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <net/xdp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include "xsk_queue.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "xdp_umem.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include "xsk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define TX_BATCH_SIZE 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) pool->cached_need_wakeup |= XDP_WAKEUP_RX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) pool->cached_need_wakeup |= XDP_WAKEUP_TX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) pool->cached_need_wakeup &= ~XDP_WAKEUP_RX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) pool->cached_need_wakeup &= ~XDP_WAKEUP_TX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) return pool->uses_need_wakeup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) EXPORT_SYMBOL(xsk_uses_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) u16 queue_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (queue_id < dev->real_num_rx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return dev->_rx[queue_id].pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) if (queue_id < dev->real_num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return dev->_tx[queue_id].pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) EXPORT_SYMBOL(xsk_get_pool_from_qid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (queue_id < dev->num_rx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) dev->_rx[queue_id].pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) if (queue_id < dev->num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) dev->_tx[queue_id].pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /* The buffer pool is stored both in the _rx struct and the _tx struct as we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * not know if the device has more tx queues than rx, or the opposite.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * This might also change during run time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) u16 queue_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) if (queue_id >= max_t(unsigned int,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) dev->real_num_rx_queues,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) dev->real_num_tx_queues))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (queue_id < dev->real_num_rx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) dev->_rx[queue_id].pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (queue_id < dev->real_num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) dev->_tx[queue_id].pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) void xp_release(struct xdp_buff_xsk *xskb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) offset += xskb->pool->headroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) if (!xskb->pool->unaligned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) return xskb->orig_addr + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) addr = xp_get_handle(xskb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) err = xskq_prod_reserve_desc(xs->rx, addr, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) xs->rx_queue_full++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) xp_release(xskb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) void *from_buf, *to_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) u32 metalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (unlikely(xdp_data_meta_unsupported(from))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) from_buf = from->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) to_buf = to->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) metalen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) from_buf = from->data_meta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) metalen = from->data - from->data_meta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) to_buf = to->data - metalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) memcpy(to_buf, from_buf, len + metalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) bool explicit_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct xdp_buff *xsk_xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) xs->rx_dropped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) xsk_xdp = xsk_buff_alloc(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) if (!xsk_xdp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) xs->rx_dropped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) xsk_copy_xdp(xsk_xdp, xdp, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) err = __xsk_rcv_zc(xs, xsk_xdp, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) xsk_buff_free(xsk_xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (explicit_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) xdp_return_buff(xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static bool xsk_tx_writeable(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) static bool xsk_is_bound(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (READ_ONCE(xs->state) == XSK_BOUND) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) /* Matches smp_wmb() in bind(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) bool explicit_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) if (!xsk_is_bound(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) len = xdp->data_end - xdp->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) __xsk_rcv_zc(xs, xdp, len) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) __xsk_rcv(xs, xdp, len, explicit_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) static void xsk_flush(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) xskq_prod_submit(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) __xskq_cons_release(xs->pool->fq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) sock_def_readable(&xs->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) spin_lock_bh(&xs->rx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) err = xsk_rcv(xs, xdp, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) xsk_flush(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) spin_unlock_bh(&xs->rx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) err = xsk_rcv(xs, xdp, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) if (!xs->flush_node.prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) list_add(&xs->flush_node, flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) void __xsk_map_flush(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct xdp_sock *xs, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) xsk_flush(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) __list_del_clearprev(&xs->flush_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) xskq_prod_submit_n(pool->cq, nb_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) EXPORT_SYMBOL(xsk_tx_completed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) void xsk_tx_release(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) __xskq_cons_release(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (xsk_tx_writeable(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) xs->sk.sk_write_space(&xs->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) EXPORT_SYMBOL(xsk_tx_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) xs->tx->queue_empty_descs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) /* This is the backpressure mechanism for the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * Reserve space in the completion queue and only proceed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * if there is space in it. This avoids having to implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * any buffering in the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) if (xskq_prod_reserve_addr(pool->cq, desc->addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) xskq_cons_release(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) EXPORT_SYMBOL(xsk_tx_peek_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) struct net_device *dev = xs->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) static int xsk_zc_xmit(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) return xsk_wakeup(xs, XDP_WAKEUP_TX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) static void xsk_destruct_skb(struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) struct xdp_sock *xs = xdp_sk(skb->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) spin_lock_irqsave(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) xskq_prod_submit_addr(xs->pool->cq, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) sock_wfree(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) static int xsk_generic_xmit(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) u32 max_batch = TX_BATCH_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) bool sent_frame = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) struct xdp_desc desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) u32 hr, tr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (xs->queue_id >= xs->dev->real_num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) tr = xs->dev->needed_tailroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) char *buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (max_batch-- == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) err = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) len = desc.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) skb = sock_alloc_send_skb(sk, hr + len + tr, 1, &err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) if (unlikely(!skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) skb_reserve(skb, hr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) skb_put(skb, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) addr = desc.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) buffer = xsk_buff_raw_get_data(xs->pool, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) err = skb_store_bits(skb, 0, buffer, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /* This is the backpressure mechanism for the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * Reserve space in the completion queue and only proceed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * if there is space in it. This avoids having to implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * any buffering in the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) spin_lock_irqsave(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) skb->dev = xs->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) skb->priority = sk->sk_priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) skb->mark = sk->sk_mark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) skb->destructor = xsk_destruct_skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) err = __dev_direct_xmit(skb, xs->queue_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) if (err == NETDEV_TX_BUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /* Tell user-space to retry the send */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) skb->destructor = sock_wfree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) spin_lock_irqsave(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) xskq_prod_cancel(xs->pool->cq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) /* Free skb without triggering the perf drop trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) consume_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) err = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) xskq_cons_release(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /* Ignore NET_XMIT_CN as packet might have been sent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (err == NET_XMIT_DROP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) /* SKB completed but not sent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) err = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) sent_frame = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) xs->tx->queue_empty_descs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (sent_frame)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) if (xsk_tx_writeable(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) sk->sk_write_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) static int __xsk_sendmsg(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (unlikely(!(xs->dev->flags & IFF_UP)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) return -ENETDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) if (unlikely(!xs->tx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (unlikely(!xsk_is_bound(xs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) if (unlikely(need_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return __xsk_sendmsg(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) static __poll_t xsk_poll(struct file *file, struct socket *sock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) struct poll_table_struct *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) __poll_t mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) struct xsk_buff_pool *pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) sock_poll_wait(file, sock, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) if (unlikely(!xsk_is_bound(xs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) pool = xs->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (pool->cached_need_wakeup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) if (xs->zc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) xsk_wakeup(xs, pool->cached_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) /* Poll needs to drive Tx also in copy mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) __xsk_sendmsg(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) if (xs->rx && !xskq_prod_is_empty(xs->rx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) mask |= EPOLLIN | EPOLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (xs->tx && xsk_tx_writeable(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) bool umem_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) struct xsk_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) if (entries == 0 || *queue || !is_power_of_2(entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) q = xskq_create(entries, umem_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) /* Make sure queue is ready before it can be seen by others */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) WRITE_ONCE(*queue, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) static void xsk_unbind_dev(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) struct net_device *dev = xs->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) if (xs->state != XSK_BOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) WRITE_ONCE(xs->state, XSK_UNBOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) /* Wait for driver to stop using the xdp socket. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) xp_del_xsk(xs->pool, xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) xs->dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) synchronize_net();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) dev_put(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) struct xdp_sock ***map_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) struct xsk_map *map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) struct xsk_map_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) *map_entry = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) spin_lock_bh(&xs->map_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) WARN_ON(xsk_map_inc(node->map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) map = node->map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) *map_entry = node->map_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) spin_unlock_bh(&xs->map_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) static void xsk_delete_from_maps(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) /* This function removes the current XDP socket from all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * maps it resides in. We need to take extra care here, due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * the two locks involved. Each map has a lock synchronizing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) * updates to the entries, and each socket has a lock that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * synchronizes access to the list of maps (map_list). For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * deadlock avoidance the locks need to be taken in the order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * "map lock"->"socket map list lock". We start off by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * accessing the socket map list, and take a reference to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * map to guarantee existence between the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * calls. Then we ask the map to remove the socket, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) * tries to remove the socket from the map. Note that there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) * might be updates to the map between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct xdp_sock **map_entry = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) struct xsk_map *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) xsk_map_try_sock_delete(map, xs, map_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) xsk_map_put(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) static int xsk_release(struct socket *sock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) struct net *net;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) if (!sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) mutex_lock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) sk_del_node_init_rcu(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) mutex_unlock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) sock_prot_inuse_add(net, sk->sk_prot, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) xsk_delete_from_maps(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) xsk_unbind_dev(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) xskq_destroy(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) xskq_destroy(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) xskq_destroy(xs->fq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) xskq_destroy(xs->cq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) sock_orphan(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) sock->sk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) sk_refcnt_debug_release(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) sock_put(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) static struct socket *xsk_lookup_xsk_from_fd(int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) sock = sockfd_lookup(fd, &err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (!sock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return ERR_PTR(-ENOTSOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) if (sock->sk->sk_family != PF_XDP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) return ERR_PTR(-ENOPROTOOPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) return sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) static bool xsk_validate_queues(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) return xs->fq_tmp && xs->cq_tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) struct net_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) u32 flags, qid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (addr_len < sizeof(struct sockaddr_xdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (sxdp->sxdp_family != AF_XDP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) flags = sxdp->sxdp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) XDP_USE_NEED_WAKEUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) rtnl_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) if (xs->state != XSK_READY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) err = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) goto out_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) if (!dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) err = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) goto out_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (!xs->rx && !xs->tx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) qid = sxdp->sxdp_queue_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) if (flags & XDP_SHARED_UMEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) struct xdp_sock *umem_xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) (flags & XDP_USE_NEED_WAKEUP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) /* Cannot specify flags for shared sockets. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) if (xs->umem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) /* We have already our own. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) if (IS_ERR(sock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) err = PTR_ERR(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) umem_xs = xdp_sk(sock->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (!xsk_is_bound(umem_xs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) err = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) if (umem_xs->queue_id != qid || umem_xs->dev != dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) /* Share the umem with another socket on another qid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) * and/or device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) xs->pool = xp_create_and_assign_umem(xs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) umem_xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) if (!xs->pool) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) dev, qid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) xp_destroy(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) xs->pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) /* Share the buffer pool with the other socket. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) if (xs->fq_tmp || xs->cq_tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) /* Do not allow setting your own fq or cq. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) xp_get_pool(umem_xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) xs->pool = umem_xs->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) xdp_get_umem(umem_xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) WRITE_ONCE(xs->umem, umem_xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) } else if (!xs->umem || !xsk_validate_queues(xs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) /* This xsk has its own umem. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) xs->pool = xp_create_and_assign_umem(xs, xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) if (!xs->pool) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) err = xp_assign_dev(xs->pool, dev, qid, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) xp_destroy(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) xs->pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) xs->fq_tmp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) xs->cq_tmp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) xs->dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) xs->zc = xs->umem->zc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) xs->queue_id = qid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) xp_add_xsk(xs->pool, xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) dev_put(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) /* Matches smp_rmb() in bind() for shared umem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * sockets, and xsk_is_bound().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) WRITE_ONCE(xs->state, XSK_BOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) out_release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) rtnl_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) struct xdp_umem_reg_v1 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) __u64 addr; /* Start of packet data area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) __u64 len; /* Length of packet data area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) __u32 chunk_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) __u32 headroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) static int xsk_setsockopt(struct socket *sock, int level, int optname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) sockptr_t optval, unsigned int optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (level != SOL_XDP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) return -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) switch (optname) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) case XDP_RX_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) case XDP_TX_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) struct xsk_queue **q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) int entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) if (optlen < sizeof(entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) if (copy_from_sockptr(&entries, optval, sizeof(entries)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) if (xs->state != XSK_READY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) err = xsk_init_queue(entries, q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) if (!err && optname == XDP_TX_RING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) /* Tx needs to be explicitly woken up the first time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) case XDP_UMEM_REG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) size_t mr_size = sizeof(struct xdp_umem_reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) struct xdp_umem_reg mr = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) struct xdp_umem *umem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) if (optlen < sizeof(struct xdp_umem_reg_v1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) else if (optlen < sizeof(mr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) mr_size = sizeof(struct xdp_umem_reg_v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) if (copy_from_sockptr(&mr, optval, mr_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) if (xs->state != XSK_READY || xs->umem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) umem = xdp_umem_create(&mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if (IS_ERR(umem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) return PTR_ERR(umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) /* Make sure umem is ready before it can be seen by others */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) WRITE_ONCE(xs->umem, umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) case XDP_UMEM_FILL_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) case XDP_UMEM_COMPLETION_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) struct xsk_queue **q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) int entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) if (copy_from_sockptr(&entries, optval, sizeof(entries)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) if (xs->state != XSK_READY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) &xs->cq_tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) err = xsk_init_queue(entries, q, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) return -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) ring->desc = offsetof(struct xdp_rxtx_ring, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) ring->desc = offsetof(struct xdp_umem_ring, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) struct xdp_statistics_v1 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) __u64 rx_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) __u64 rx_invalid_descs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) __u64 tx_invalid_descs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) static int xsk_getsockopt(struct socket *sock, int level, int optname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) char __user *optval, int __user *optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) if (level != SOL_XDP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) return -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) if (len < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) switch (optname) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) case XDP_STATISTICS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) struct xdp_statistics stats = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) bool extra_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) size_t stats_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) if (len < sizeof(struct xdp_statistics_v1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) } else if (len < sizeof(stats)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) extra_stats = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) stats_size = sizeof(struct xdp_statistics_v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) stats_size = sizeof(stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) stats.rx_dropped = xs->rx_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) if (extra_stats) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) stats.rx_ring_full = xs->rx_queue_full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) stats.rx_fill_ring_empty_descs =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) stats.rx_dropped += xs->rx_queue_full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) if (copy_to_user(optval, &stats, stats_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) if (put_user(stats_size, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) case XDP_MMAP_OFFSETS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) struct xdp_mmap_offsets off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) struct xdp_mmap_offsets_v1 off_v1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) bool flags_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) void *to_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (len < sizeof(off_v1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) else if (len < sizeof(off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) flags_supported = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) if (flags_supported) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) /* xdp_ring_offset is identical to xdp_ring_offset_v1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) * except for the flags field added to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) &off.rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) &off.tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) &off.fr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) &off.cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) off.rx.flags = offsetof(struct xdp_rxtx_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) off.tx.flags = offsetof(struct xdp_rxtx_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) off.fr.flags = offsetof(struct xdp_umem_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) off.cr.flags = offsetof(struct xdp_umem_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) len = sizeof(off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) to_copy = &off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) xsk_enter_rxtx_offsets(&off_v1.rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) xsk_enter_rxtx_offsets(&off_v1.tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) xsk_enter_umem_offsets(&off_v1.fr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) xsk_enter_umem_offsets(&off_v1.cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) len = sizeof(off_v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) to_copy = &off_v1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) if (copy_to_user(optval, to_copy, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) case XDP_OPTIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) struct xdp_options opts = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) if (len < sizeof(opts))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) if (xs->zc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) opts.flags |= XDP_OPTIONS_ZEROCOPY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) len = sizeof(opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) if (copy_to_user(optval, &opts, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) static int xsk_mmap(struct file *file, struct socket *sock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) unsigned long size = vma->vm_end - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) struct xdp_sock *xs = xdp_sk(sock->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) struct xsk_queue *q = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) struct page *qpg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) if (READ_ONCE(xs->state) != XSK_READY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (offset == XDP_PGOFF_RX_RING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) q = READ_ONCE(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) } else if (offset == XDP_PGOFF_TX_RING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) q = READ_ONCE(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) /* Matches the smp_wmb() in XDP_UMEM_REG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) if (offset == XDP_UMEM_PGOFF_FILL_RING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) q = READ_ONCE(xs->fq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) q = READ_ONCE(xs->cq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) /* Matches the smp_wmb() in xsk_init_queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) qpg = virt_to_head_page(q->ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) if (size > page_size(qpg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) return remap_pfn_range(vma, vma->vm_start, pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) size, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) static int xsk_notifier(struct notifier_block *this,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) unsigned long msg, void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) struct net_device *dev = netdev_notifier_info_to_dev(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) struct net *net = dev_net(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) struct sock *sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) switch (msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) case NETDEV_UNREGISTER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) mutex_lock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) sk_for_each(sk, &net->xdp.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) if (xs->dev == dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) sk->sk_err = ENETDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) sk->sk_error_report(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) xsk_unbind_dev(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) /* Clear device references. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) xp_clear_dev(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) mutex_unlock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) static struct proto xsk_proto = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) .name = "XDP",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) .obj_size = sizeof(struct xdp_sock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) static const struct proto_ops xsk_proto_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) .family = PF_XDP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) .release = xsk_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) .bind = xsk_bind,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) .connect = sock_no_connect,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) .socketpair = sock_no_socketpair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) .accept = sock_no_accept,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) .getname = sock_no_getname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) .poll = xsk_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) .ioctl = sock_no_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) .listen = sock_no_listen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) .shutdown = sock_no_shutdown,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) .setsockopt = xsk_setsockopt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) .getsockopt = xsk_getsockopt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) .sendmsg = xsk_sendmsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) .recvmsg = sock_no_recvmsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) .mmap = xsk_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) .sendpage = sock_no_sendpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) static void xsk_destruct(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) if (!xp_put_pool(xs->pool))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) xdp_put_umem(xs->umem, !xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) sk_refcnt_debug_dec(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) static int xsk_create(struct net *net, struct socket *sock, int protocol,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) int kern)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) struct sock *sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (!ns_capable(net->user_ns, CAP_NET_RAW))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) if (sock->type != SOCK_RAW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) return -ESOCKTNOSUPPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) if (protocol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) return -EPROTONOSUPPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) sock->state = SS_UNCONNECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) if (!sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) sock->ops = &xsk_proto_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) sock_init_data(sock, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) sk->sk_family = PF_XDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) sk->sk_destruct = xsk_destruct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) sk_refcnt_debug_inc(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) sock_set_flag(sk, SOCK_RCU_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) xs->state = XSK_READY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) mutex_init(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) spin_lock_init(&xs->rx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) INIT_LIST_HEAD(&xs->map_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) spin_lock_init(&xs->map_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) mutex_lock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) sk_add_node_rcu(sk, &net->xdp.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) mutex_unlock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) sock_prot_inuse_add(net, &xsk_proto, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) static const struct net_proto_family xsk_family_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) .family = PF_XDP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) .create = xsk_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) static struct notifier_block xsk_netdev_notifier = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) .notifier_call = xsk_notifier,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) static int __net_init xsk_net_init(struct net *net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) mutex_init(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) INIT_HLIST_HEAD(&net->xdp.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static void __net_exit xsk_net_exit(struct net *net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) static struct pernet_operations xsk_net_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) .init = xsk_net_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) .exit = xsk_net_exit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static int __init xsk_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) int err, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) err = proto_register(&xsk_proto, 0 /* no slab */);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) err = sock_register(&xsk_family_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) goto out_proto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) err = register_pernet_subsys(&xsk_net_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) goto out_sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) err = register_netdevice_notifier(&xsk_netdev_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) goto out_pernet;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) out_pernet:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) unregister_pernet_subsys(&xsk_net_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) out_sk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) sock_unregister(PF_XDP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) out_proto:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) proto_unregister(&xsk_proto);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) fs_initcall(xsk_init);