Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /* XDP sockets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * AF_XDP sockets allows a channel between XDP programs and userspace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * applications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  * Copyright(c) 2018 Intel Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * Author(s): Björn Töpel <bjorn.topel@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *	      Magnus Karlsson <magnus.karlsson@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/if_xdp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/socket.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/net.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/netdevice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/rculist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <net/xdp_sock_drv.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <net/xdp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include "xsk_queue.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include "xdp_umem.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include "xsk.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #define TX_BATCH_SIZE 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 	if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) 	pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 	pool->cached_need_wakeup |= XDP_WAKEUP_RX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 	if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 		xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 	pool->cached_need_wakeup |= XDP_WAKEUP_TX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 	if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	pool->cached_need_wakeup &= ~XDP_WAKEUP_RX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 		xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	pool->cached_need_wakeup &= ~XDP_WAKEUP_TX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	return pool->uses_need_wakeup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) EXPORT_SYMBOL(xsk_uses_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 					    u16 queue_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	if (queue_id < dev->real_num_rx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 		return dev->_rx[queue_id].pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	if (queue_id < dev->real_num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 		return dev->_tx[queue_id].pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) EXPORT_SYMBOL(xsk_get_pool_from_qid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	if (queue_id < dev->num_rx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 		dev->_rx[queue_id].pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	if (queue_id < dev->num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 		dev->_tx[queue_id].pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) /* The buffer pool is stored both in the _rx struct and the _tx struct as we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117)  * not know if the device has more tx queues than rx, or the opposite.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118)  * This might also change during run time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 			u16 queue_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	if (queue_id >= max_t(unsigned int,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 			      dev->real_num_rx_queues,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 			      dev->real_num_tx_queues))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	if (queue_id < dev->real_num_rx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 		dev->_rx[queue_id].pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	if (queue_id < dev->real_num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 		dev->_tx[queue_id].pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) void xp_release(struct xdp_buff_xsk *xskb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	offset += xskb->pool->headroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	if (!xskb->pool->unaligned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 		return xskb->orig_addr + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	addr = xp_get_handle(xskb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	err = xskq_prod_reserve_desc(xs->rx, addr, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		xs->rx_queue_full++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	xp_release(xskb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	void *from_buf, *to_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	u32 metalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	if (unlikely(xdp_data_meta_unsupported(from))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 		from_buf = from->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 		to_buf = to->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 		metalen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 		from_buf = from->data_meta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		metalen = from->data - from->data_meta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 		to_buf = to->data - metalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	memcpy(to_buf, from_buf, len + metalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		     bool explicit_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	struct xdp_buff *xsk_xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 		xs->rx_dropped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 		return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	xsk_xdp = xsk_buff_alloc(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	if (!xsk_xdp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		xs->rx_dropped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	xsk_copy_xdp(xsk_xdp, xdp, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	err = __xsk_rcv_zc(xs, xsk_xdp, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 		xsk_buff_free(xsk_xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	if (explicit_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 		xdp_return_buff(xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) static bool xsk_tx_writeable(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) static bool xsk_is_bound(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	if (READ_ONCE(xs->state) == XSK_BOUND) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 		/* Matches smp_wmb() in bind(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 		smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 		   bool explicit_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	if (!xsk_is_bound(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	len = xdp->data_end - xdp->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 		__xsk_rcv_zc(xs, xdp, len) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 		__xsk_rcv(xs, xdp, len, explicit_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) static void xsk_flush(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	xskq_prod_submit(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	__xskq_cons_release(xs->pool->fq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	sock_def_readable(&xs->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	spin_lock_bh(&xs->rx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	err = xsk_rcv(xs, xdp, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	xsk_flush(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	spin_unlock_bh(&xs->rx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	err = xsk_rcv(xs, xdp, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	if (!xs->flush_node.prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		list_add(&xs->flush_node, flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) void __xsk_map_flush(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	struct xdp_sock *xs, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 		xsk_flush(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 		__list_del_clearprev(&xs->flush_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	xskq_prod_submit_n(pool->cq, nb_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) EXPORT_SYMBOL(xsk_tx_completed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) void xsk_tx_release(struct xsk_buff_pool *pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		__xskq_cons_release(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 		if (xsk_tx_writeable(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 			xs->sk.sk_write_space(&xs->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) EXPORT_SYMBOL(xsk_tx_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 		if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 			xs->tx->queue_empty_descs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		/* This is the backpressure mechanism for the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		 * Reserve space in the completion queue and only proceed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 		 * if there is space in it. This avoids having to implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		 * any buffering in the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 		if (xskq_prod_reserve_addr(pool->cq, desc->addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		xskq_cons_release(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) EXPORT_SYMBOL(xsk_tx_peek_desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	struct net_device *dev = xs->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) static int xsk_zc_xmit(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	return xsk_wakeup(xs, XDP_WAKEUP_TX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) static void xsk_destruct_skb(struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	struct xdp_sock *xs = xdp_sk(skb->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	spin_lock_irqsave(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	xskq_prod_submit_addr(xs->pool->cq, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 	sock_wfree(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) static int xsk_generic_xmit(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	u32 max_batch = TX_BATCH_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	bool sent_frame = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	struct xdp_desc desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	u32 hr, tr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	if (xs->queue_id >= xs->dev->real_num_tx_queues)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	tr = xs->dev->needed_tailroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 		char *buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 		u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 		if (max_batch-- == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 			err = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 		len = desc.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 		skb = sock_alloc_send_skb(sk, hr + len + tr, 1, &err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 		if (unlikely(!skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 		skb_reserve(skb, hr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		skb_put(skb, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		addr = desc.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 		buffer = xsk_buff_raw_get_data(xs->pool, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 		err = skb_store_bits(skb, 0, buffer, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		/* This is the backpressure mechanism for the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		 * Reserve space in the completion queue and only proceed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		 * if there is space in it. This avoids having to implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 		 * any buffering in the Tx path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		spin_lock_irqsave(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 		if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 			kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 		spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		skb->dev = xs->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		skb->priority = sk->sk_priority;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 		skb->mark = sk->sk_mark;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 		skb->destructor = xsk_destruct_skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 		err = __dev_direct_xmit(skb, xs->queue_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 		if  (err == NETDEV_TX_BUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 			/* Tell user-space to retry the send */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 			skb->destructor = sock_wfree;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 			spin_lock_irqsave(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 			xskq_prod_cancel(xs->pool->cq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 			/* Free skb without triggering the perf drop trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 			consume_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 			err = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 		xskq_cons_release(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 		/* Ignore NET_XMIT_CN as packet might have been sent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 		if (err == NET_XMIT_DROP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 			/* SKB completed but not sent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 			err = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 		sent_frame = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	xs->tx->queue_empty_descs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	if (sent_frame)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		if (xsk_tx_writeable(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 			sk->sk_write_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) static int __xsk_sendmsg(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	if (unlikely(!(xs->dev->flags & IFF_UP)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 		return -ENETDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	if (unlikely(!xs->tx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 		return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	if (unlikely(!xsk_is_bound(xs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 		return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 	if (unlikely(need_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	return __xsk_sendmsg(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) static __poll_t xsk_poll(struct file *file, struct socket *sock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 			     struct poll_table_struct *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	__poll_t mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	struct xsk_buff_pool *pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	sock_poll_wait(file, sock, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	if (unlikely(!xsk_is_bound(xs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	pool = xs->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	if (pool->cached_need_wakeup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 		if (xs->zc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 			xsk_wakeup(xs, pool->cached_need_wakeup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 			/* Poll needs to drive Tx also in copy mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 			__xsk_sendmsg(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	if (xs->rx && !xskq_prod_is_empty(xs->rx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 		mask |= EPOLLIN | EPOLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	if (xs->tx && xsk_tx_writeable(xs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 		mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 			  bool umem_queue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	struct xsk_queue *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	if (entries == 0 || *queue || !is_power_of_2(entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	q = xskq_create(entries, umem_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	/* Make sure queue is ready before it can be seen by others */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	WRITE_ONCE(*queue, q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) static void xsk_unbind_dev(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	struct net_device *dev = xs->dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	if (xs->state != XSK_BOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	WRITE_ONCE(xs->state, XSK_UNBOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	/* Wait for driver to stop using the xdp socket. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	xp_del_xsk(xs->pool, xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	xs->dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	synchronize_net();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	dev_put(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 					      struct xdp_sock ***map_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	struct xsk_map *map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	struct xsk_map_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	*map_entry = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	spin_lock_bh(&xs->map_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 					node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	if (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		WARN_ON(xsk_map_inc(node->map));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 		map = node->map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		*map_entry = node->map_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	spin_unlock_bh(&xs->map_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) static void xsk_delete_from_maps(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	/* This function removes the current XDP socket from all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	 * maps it resides in. We need to take extra care here, due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	 * the two locks involved. Each map has a lock synchronizing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	 * updates to the entries, and each socket has a lock that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	 * synchronizes access to the list of maps (map_list). For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	 * deadlock avoidance the locks need to be taken in the order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	 * "map lock"->"socket map list lock". We start off by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	 * accessing the socket map list, and take a reference to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	 * map to guarantee existence between the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	 * calls. Then we ask the map to remove the socket, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	 * tries to remove the socket from the map. Note that there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	 * might be updates to the map between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	struct xdp_sock **map_entry = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	struct xsk_map *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		xsk_map_try_sock_delete(map, xs, map_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		xsk_map_put(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) static int xsk_release(struct socket *sock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	struct net *net;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	if (!sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	mutex_lock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	sk_del_node_init_rcu(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	mutex_unlock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	sock_prot_inuse_add(net, sk->sk_prot, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	xsk_delete_from_maps(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	xsk_unbind_dev(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	xskq_destroy(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	xskq_destroy(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	xskq_destroy(xs->fq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	xskq_destroy(xs->cq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	sock_orphan(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	sock->sk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	sk_refcnt_debug_release(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	sock_put(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) static struct socket *xsk_lookup_xsk_from_fd(int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	sock = sockfd_lookup(fd, &err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	if (!sock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		return ERR_PTR(-ENOTSOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	if (sock->sk->sk_family != PF_XDP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 		sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		return ERR_PTR(-ENOPROTOOPT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	return sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) static bool xsk_validate_queues(struct xdp_sock *xs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	return xs->fq_tmp && xs->cq_tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	struct net_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	u32 flags, qid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	if (addr_len < sizeof(struct sockaddr_xdp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	if (sxdp->sxdp_family != AF_XDP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	flags = sxdp->sxdp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		      XDP_USE_NEED_WAKEUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	rtnl_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	if (xs->state != XSK_READY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		err = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		goto out_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	if (!dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		err = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		goto out_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	if (!xs->rx && !xs->tx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 		err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	qid = sxdp->sxdp_queue_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	if (flags & XDP_SHARED_UMEM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 		struct xdp_sock *umem_xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 		struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 		    (flags & XDP_USE_NEED_WAKEUP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 			/* Cannot specify flags for shared sockets. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 			err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 		if (xs->umem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 			/* We have already our own. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 			err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 		sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 		if (IS_ERR(sock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 			err = PTR_ERR(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		umem_xs = xdp_sk(sock->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		if (!xsk_is_bound(umem_xs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 			err = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 			sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 		if (umem_xs->queue_id != qid || umem_xs->dev != dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 			/* Share the umem with another socket on another qid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 			 * and/or device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 			xs->pool = xp_create_and_assign_umem(xs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 							     umem_xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 			if (!xs->pool) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 				err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 				sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 				goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 			err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 						   dev, qid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 			if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 				xp_destroy(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 				xs->pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 				sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 				goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 			/* Share the buffer pool with the other socket. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 			if (xs->fq_tmp || xs->cq_tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 				/* Do not allow setting your own fq or cq. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 				err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 				sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 				goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 			xp_get_pool(umem_xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 			xs->pool = umem_xs->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		xdp_get_umem(umem_xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 		WRITE_ONCE(xs->umem, umem_xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 		sockfd_put(sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	} else if (!xs->umem || !xsk_validate_queues(xs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 		err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 		/* This xsk has its own umem. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		xs->pool = xp_create_and_assign_umem(xs, xs->umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 		if (!xs->pool) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 			err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		err = xp_assign_dev(xs->pool, dev, qid, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 			xp_destroy(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 			xs->pool = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	/* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	xs->fq_tmp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	xs->cq_tmp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	xs->dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	xs->zc = xs->umem->zc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	xs->queue_id = qid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	xp_add_xsk(xs->pool, xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		dev_put(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 		/* Matches smp_rmb() in bind() for shared umem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 		 * sockets, and xsk_is_bound().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		WRITE_ONCE(xs->state, XSK_BOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) out_release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 	mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	rtnl_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) struct xdp_umem_reg_v1 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	__u64 addr; /* Start of packet data area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	__u64 len; /* Length of packet data area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	__u32 chunk_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	__u32 headroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) static int xsk_setsockopt(struct socket *sock, int level, int optname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 			  sockptr_t optval, unsigned int optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	if (level != SOL_XDP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 		return -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	switch (optname) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	case XDP_RX_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	case XDP_TX_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 		struct xsk_queue **q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		int entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 		if (optlen < sizeof(entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 		if (copy_from_sockptr(&entries, optval, sizeof(entries)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 		if (xs->state != XSK_READY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 			mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 		q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 		err = xsk_init_queue(entries, q, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 		if (!err && optname == XDP_TX_RING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 			/* Tx needs to be explicitly woken up the first time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 			xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	case XDP_UMEM_REG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		size_t mr_size = sizeof(struct xdp_umem_reg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		struct xdp_umem_reg mr = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		struct xdp_umem *umem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 		if (optlen < sizeof(struct xdp_umem_reg_v1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		else if (optlen < sizeof(mr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 			mr_size = sizeof(struct xdp_umem_reg_v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 		if (copy_from_sockptr(&mr, optval, mr_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 		mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		if (xs->state != XSK_READY || xs->umem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 			mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		umem = xdp_umem_create(&mr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 		if (IS_ERR(umem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 			mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 			return PTR_ERR(umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 		/* Make sure umem is ready before it can be seen by others */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 		smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		WRITE_ONCE(xs->umem, umem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 		mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	case XDP_UMEM_FILL_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	case XDP_UMEM_COMPLETION_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		struct xsk_queue **q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		int entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		if (copy_from_sockptr(&entries, optval, sizeof(entries)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		if (xs->state != XSK_READY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 			mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 		q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 			&xs->cq_tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		err = xsk_init_queue(entries, q, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	return -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	ring->desc = offsetof(struct xdp_rxtx_ring, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	ring->desc = offsetof(struct xdp_umem_ring, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) struct xdp_statistics_v1 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	__u64 rx_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	__u64 rx_invalid_descs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	__u64 tx_invalid_descs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) static int xsk_getsockopt(struct socket *sock, int level, int optname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 			  char __user *optval, int __user *optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	if (level != SOL_XDP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 		return -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	if (len < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	switch (optname) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	case XDP_STATISTICS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 		struct xdp_statistics stats = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		bool extra_stats = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		size_t stats_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 		if (len < sizeof(struct xdp_statistics_v1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		} else if (len < sizeof(stats)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 			extra_stats = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 			stats_size = sizeof(struct xdp_statistics_v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 			stats_size = sizeof(stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 		mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 		stats.rx_dropped = xs->rx_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		if (extra_stats) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 			stats.rx_ring_full = xs->rx_queue_full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 			stats.rx_fill_ring_empty_descs =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 				xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 			stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 			stats.rx_dropped += xs->rx_queue_full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 		stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 		stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 		mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 		if (copy_to_user(optval, &stats, stats_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 		if (put_user(stats_size, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	case XDP_MMAP_OFFSETS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 		struct xdp_mmap_offsets off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 		struct xdp_mmap_offsets_v1 off_v1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 		bool flags_supported = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 		void *to_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		if (len < sizeof(off_v1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 		else if (len < sizeof(off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 			flags_supported = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 		if (flags_supported) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 			/* xdp_ring_offset is identical to xdp_ring_offset_v1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 			 * except for the flags field added to the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 					       &off.rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 					       &off.tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 					       &off.fr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 					       &off.cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 			off.rx.flags = offsetof(struct xdp_rxtx_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 						ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 			off.tx.flags = offsetof(struct xdp_rxtx_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 						ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 			off.fr.flags = offsetof(struct xdp_umem_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 						ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 			off.cr.flags = offsetof(struct xdp_umem_ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 						ptrs.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 			len = sizeof(off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 			to_copy = &off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 			xsk_enter_rxtx_offsets(&off_v1.rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 			xsk_enter_rxtx_offsets(&off_v1.tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 			xsk_enter_umem_offsets(&off_v1.fr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 			xsk_enter_umem_offsets(&off_v1.cr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 			len = sizeof(off_v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 			to_copy = &off_v1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		if (copy_to_user(optval, to_copy, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 		if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	case XDP_OPTIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 		struct xdp_options opts = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		if (len < sizeof(opts))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 		mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 		if (xs->zc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 			opts.flags |= XDP_OPTIONS_ZEROCOPY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		len = sizeof(opts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		if (copy_to_user(optval, &opts, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) static int xsk_mmap(struct file *file, struct socket *sock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		    struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	unsigned long size = vma->vm_end - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	struct xdp_sock *xs = xdp_sk(sock->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	struct xsk_queue *q = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	struct page *qpg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	if (READ_ONCE(xs->state) != XSK_READY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 		return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	if (offset == XDP_PGOFF_RX_RING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		q = READ_ONCE(xs->rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	} else if (offset == XDP_PGOFF_TX_RING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 		q = READ_ONCE(xs->tx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		/* Matches the smp_wmb() in XDP_UMEM_REG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 		smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 		if (offset == XDP_UMEM_PGOFF_FILL_RING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 			q = READ_ONCE(xs->fq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 			q = READ_ONCE(xs->cq_tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	if (!q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	/* Matches the smp_wmb() in xsk_init_queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	qpg = virt_to_head_page(q->ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	if (size > page_size(qpg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	return remap_pfn_range(vma, vma->vm_start, pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 			       size, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) static int xsk_notifier(struct notifier_block *this,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 			unsigned long msg, void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	struct net *net = dev_net(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	struct sock *sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	switch (msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	case NETDEV_UNREGISTER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		mutex_lock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 		sk_for_each(sk, &net->xdp.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 			struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 			mutex_lock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 			if (xs->dev == dev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 				sk->sk_err = ENETDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 				if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 					sk->sk_error_report(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 				xsk_unbind_dev(xs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 				/* Clear device references. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 				xp_clear_dev(xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 			mutex_unlock(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		mutex_unlock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) static struct proto xsk_proto = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	.name =		"XDP",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	.owner =	THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	.obj_size =	sizeof(struct xdp_sock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) static const struct proto_ops xsk_proto_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	.family		= PF_XDP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	.owner		= THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	.release	= xsk_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	.bind		= xsk_bind,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	.connect	= sock_no_connect,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	.socketpair	= sock_no_socketpair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	.accept		= sock_no_accept,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	.getname	= sock_no_getname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	.poll		= xsk_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	.ioctl		= sock_no_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	.listen		= sock_no_listen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	.shutdown	= sock_no_shutdown,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	.setsockopt	= xsk_setsockopt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	.getsockopt	= xsk_getsockopt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	.sendmsg	= xsk_sendmsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	.recvmsg	= sock_no_recvmsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	.mmap		= xsk_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	.sendpage	= sock_no_sendpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) static void xsk_destruct(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	struct xdp_sock *xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	if (!xp_put_pool(xs->pool))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 		xdp_put_umem(xs->umem, !xs->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	sk_refcnt_debug_dec(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) static int xsk_create(struct net *net, struct socket *sock, int protocol,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		      int kern)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	struct xdp_sock *xs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	struct sock *sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 		return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	if (sock->type != SOCK_RAW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		return -ESOCKTNOSUPPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	if (protocol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		return -EPROTONOSUPPORT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	sock->state = SS_UNCONNECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	if (!sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 		return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	sock->ops = &xsk_proto_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	sock_init_data(sock, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	sk->sk_family = PF_XDP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	sk->sk_destruct = xsk_destruct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	sk_refcnt_debug_inc(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	sock_set_flag(sk, SOCK_RCU_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	xs = xdp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	xs->state = XSK_READY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	mutex_init(&xs->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	spin_lock_init(&xs->rx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	INIT_LIST_HEAD(&xs->map_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	spin_lock_init(&xs->map_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	mutex_lock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	sk_add_node_rcu(sk, &net->xdp.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	mutex_unlock(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	sock_prot_inuse_add(net, &xsk_proto, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) static const struct net_proto_family xsk_family_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 	.family = PF_XDP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	.create = xsk_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	.owner	= THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) static struct notifier_block xsk_netdev_notifier = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	.notifier_call	= xsk_notifier,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) static int __net_init xsk_net_init(struct net *net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	mutex_init(&net->xdp.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	INIT_HLIST_HEAD(&net->xdp.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static void __net_exit xsk_net_exit(struct net *net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) static struct pernet_operations xsk_net_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	.init = xsk_net_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	.exit = xsk_net_exit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) static int __init xsk_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	int err, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	err = proto_register(&xsk_proto, 0 /* no slab */);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	err = sock_register(&xsk_family_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		goto out_proto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	err = register_pernet_subsys(&xsk_net_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		goto out_sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	err = register_netdevice_notifier(&xsk_netdev_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 		goto out_pernet;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 		INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) out_pernet:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	unregister_pernet_subsys(&xsk_net_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) out_sk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	sock_unregister(PF_XDP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) out_proto:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	proto_unregister(&xsk_proto);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) fs_initcall(xsk_init);