^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * drivers/net/veth.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Author: Pavel Emelianov <xemul@openvz.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/netdevice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/ethtool.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/etherdevice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/u64_stats_sync.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <net/rtnetlink.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <net/dst.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <net/xfrm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <net/xdp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/veth.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/bpf.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/filter.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/ptr_ring.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/bpf_trace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/net_tstamp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define DRV_NAME "veth"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define DRV_VERSION "1.0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define VETH_XDP_FLAG BIT(0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define VETH_RING_SIZE 256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define VETH_XDP_TX_BULK_SIZE 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) struct veth_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) u64 rx_drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /* xdp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) u64 xdp_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) u64 xdp_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) u64 xdp_redirect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) u64 xdp_drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) u64 xdp_tx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) u64 xdp_tx_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) u64 peer_tq_xdp_xmit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) u64 peer_tq_xdp_xmit_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct veth_rq_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct veth_stats vs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct u64_stats_sync syncp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct veth_rq {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct napi_struct xdp_napi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct net_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) struct bpf_prog __rcu *xdp_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct xdp_mem_info xdp_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct veth_rq_stats stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) bool rx_notify_masked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct ptr_ring xdp_ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) struct xdp_rxq_info xdp_rxq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct veth_priv {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct net_device __rcu *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) atomic64_t dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct bpf_prog *_xdp_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct veth_rq *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) unsigned int requested_headroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) struct veth_xdp_tx_bq {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) unsigned int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * ethtool interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) struct veth_q_stat_desc {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) char desc[ETH_GSTRING_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) size_t offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define VETH_RQ_STAT(m) offsetof(struct veth_stats, m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) { "xdp_packets", VETH_RQ_STAT(xdp_packets) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) { "drops", VETH_RQ_STAT(rx_drops) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) { "xdp_drops", VETH_RQ_STAT(xdp_drops) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) { "xdp_tx", VETH_RQ_STAT(xdp_tx) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static const struct veth_q_stat_desc veth_tq_stats_desc[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) #define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) static struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) const char string[ETH_GSTRING_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) } ethtool_stats_keys[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) { "peer_ifindex" },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) static int veth_get_link_ksettings(struct net_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) struct ethtool_link_ksettings *cmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) cmd->base.speed = SPEED_10000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) cmd->base.duplex = DUPLEX_FULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) cmd->base.port = PORT_TP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) cmd->base.autoneg = AUTONEG_DISABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) strlcpy(info->version, DRV_VERSION, sizeof(info->version));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) char *p = (char *)buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) switch(stringset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) case ETH_SS_STATS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) p += sizeof(ethtool_stats_keys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) snprintf(p, ETH_GSTRING_LEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) "rx_queue_%u_%.18s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) i, veth_rq_stats_desc[j].desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) p += ETH_GSTRING_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) for (i = 0; i < dev->real_num_tx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) for (j = 0; j < VETH_TQ_STATS_LEN; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) snprintf(p, ETH_GSTRING_LEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) "tx_queue_%u_%.18s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) i, veth_tq_stats_desc[j].desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) p += ETH_GSTRING_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static int veth_get_sset_count(struct net_device *dev, int sset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) switch (sset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) case ETH_SS_STATS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) return ARRAY_SIZE(ethtool_stats_keys) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) static void veth_get_ethtool_stats(struct net_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct ethtool_stats *stats, u64 *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) struct net_device *peer = rtnl_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) int i, j, idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) data[0] = peer ? peer->ifindex : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) idx = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) const void *stats_base = (void *)&rq_stats->vs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) unsigned int start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) size_t offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) offset = veth_rq_stats_desc[j].offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) data[idx + j] = *(u64 *)(stats_base + offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) idx += VETH_RQ_STATS_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) if (!peer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) rcv_priv = netdev_priv(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) for (i = 0; i < peer->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) const void *base = (void *)&rq_stats->vs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) unsigned int start, tx_idx = idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) size_t offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) for (j = 0; j < VETH_TQ_STATS_LEN; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) offset = veth_tq_stats_desc[j].offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) data[tx_idx + j] += *(u64 *)(base + offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) static const struct ethtool_ops veth_ethtool_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) .get_drvinfo = veth_get_drvinfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) .get_link = ethtool_op_get_link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) .get_strings = veth_get_strings,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) .get_sset_count = veth_get_sset_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) .get_ethtool_stats = veth_get_ethtool_stats,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) .get_link_ksettings = veth_get_link_ksettings,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) .get_ts_info = ethtool_op_get_ts_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) /* general routines */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) static bool veth_is_xdp_frame(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) return (unsigned long)ptr & VETH_XDP_FLAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static struct xdp_frame *veth_ptr_to_xdp(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) static void *veth_xdp_to_ptr(struct xdp_frame *xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) return (void *)((unsigned long)xdp | VETH_XDP_FLAG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static void veth_ptr_free(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (veth_is_xdp_frame(ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) xdp_return_frame(veth_ptr_to_xdp(ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) kfree_skb(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) static void __veth_xdp_flush(struct veth_rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) /* Write ptr_ring before reading rx_notify_masked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) if (!READ_ONCE(rq->rx_notify_masked) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) napi_schedule_prep(&rq->xdp_napi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) WRITE_ONCE(rq->rx_notify_masked, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) __napi_schedule(&rq->xdp_napi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) dev_kfree_skb_any(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return NET_RX_DROP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) return NET_RX_SUCCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) struct veth_rq *rq, bool xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) return __dev_forward_skb(dev, skb) ?: xdp ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) veth_xdp_rx(rq, skb) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) netif_rx(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) struct veth_rq *rq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) struct net_device *rcv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) int length = skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) bool rcv_xdp = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) int rxq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) rcv = rcu_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) if (unlikely(!rcv)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) rcv_priv = netdev_priv(rcv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) rxq = skb_get_queue_mapping(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (rxq < rcv->real_num_rx_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) rq = &rcv_priv->rq[rxq];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) rcv_xdp = rcu_access_pointer(rq->xdp_prog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) skb_tx_timestamp(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!rcv_xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) dev_lstats_add(dev, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) atomic64_inc(&priv->dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) if (rcv_xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) __veth_xdp_flush(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) return NETDEV_TX_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) dev_lstats_read(dev, packets, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) return atomic64_read(&priv->dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) result->peer_tq_xdp_xmit_err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) result->xdp_packets = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) result->xdp_tx_err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) result->xdp_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) result->rx_drops = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) for (i = 0; i < dev->num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) struct veth_rq_stats *stats = &priv->rq[i].stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) unsigned int start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) start = u64_stats_fetch_begin_irq(&stats->syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) xdp_tx_err = stats->vs.xdp_tx_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) packets = stats->vs.xdp_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) bytes = stats->vs.xdp_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) drops = stats->vs.rx_drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) result->xdp_tx_err += xdp_tx_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) result->xdp_packets += packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) result->xdp_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) result->rx_drops += drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) static void veth_get_stats64(struct net_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) struct rtnl_link_stats64 *tot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) struct net_device *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) struct veth_stats rx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) u64 packets, bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) tot->tx_bytes = bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) tot->tx_packets = packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) veth_stats_rx(&rx, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) tot->tx_dropped += rx.xdp_tx_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) tot->rx_bytes = rx.xdp_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) tot->rx_packets = rx.xdp_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) peer = rcu_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (peer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) veth_stats_tx(peer, &packets, &bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) tot->rx_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) tot->rx_packets += packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) veth_stats_rx(&rx, peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) tot->rx_dropped += rx.xdp_tx_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) tot->tx_bytes += rx.xdp_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) tot->tx_packets += rx.xdp_packets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) /* fake multicast ability */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) static void veth_set_multicast_list(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) int buflen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) skb = build_skb(head, buflen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) skb_reserve(skb, headroom);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) skb_put(skb, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) static int veth_select_rxq(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) return smp_processor_id() % dev->real_num_rx_queues;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) static struct net_device *veth_peer_dev(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) /* Callers must be under RCU read side. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) return rcu_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) static int veth_xdp_xmit(struct net_device *dev, int n,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) struct xdp_frame **frames,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) u32 flags, bool ndo_xmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) int i, ret = -ENXIO, drops = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) struct net_device *rcv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) unsigned int max_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) struct veth_rq *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) rcv = rcu_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (unlikely(!rcv))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) rcv_priv = netdev_priv(rcv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) rq = &rcv_priv->rq[veth_select_rxq(rcv)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * side. This means an XDP program is loaded on the peer and the peer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * device is up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if (!rcu_access_pointer(rq->xdp_prog))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) spin_lock(&rq->xdp_ring.producer_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) for (i = 0; i < n; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) struct xdp_frame *frame = frames[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) void *ptr = veth_xdp_to_ptr(frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (unlikely(frame->len > max_len ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) __ptr_ring_produce(&rq->xdp_ring, ptr))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) xdp_return_frame_rx_napi(frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) spin_unlock(&rq->xdp_ring.producer_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (flags & XDP_XMIT_FLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) __veth_xdp_flush(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) ret = n - drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) if (ndo_xmit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) u64_stats_update_begin(&rq->stats.syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) rq->stats.vs.peer_tq_xdp_xmit += n - drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) rq->stats.vs.peer_tq_xdp_xmit_err += drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) u64_stats_update_end(&rq->stats.syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static int veth_ndo_xdp_xmit(struct net_device *dev, int n,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) struct xdp_frame **frames, u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) err = veth_xdp_xmit(dev, n, frames, flags, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) atomic64_add(n, &priv->dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) int sent, i, err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (sent < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) err = sent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) for (i = 0; i < bq->count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) xdp_return_frame(bq->q[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) trace_xdp_bulk_tx(rq->dev, sent, bq->count - sent, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) u64_stats_update_begin(&rq->stats.syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) rq->stats.vs.xdp_tx += sent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) rq->stats.vs.xdp_tx_err += bq->count - sent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) u64_stats_update_end(&rq->stats.syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) bq->count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) struct net_device *rcv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) struct veth_rq *rcv_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) veth_xdp_flush_bq(rq, bq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) rcv = rcu_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (unlikely(!rcv))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) rcv_priv = netdev_priv(rcv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) /* xdp_ring is initialized on receive side? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) __veth_xdp_flush(rcv_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) struct veth_xdp_tx_bq *bq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) if (unlikely(!frame))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) veth_xdp_flush_bq(rq, bq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) bq->q[bq->count++] = frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) struct xdp_frame *frame,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct veth_xdp_tx_bq *bq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) struct veth_stats *stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) void *hard_start = frame->data - frame->headroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) int len = frame->len, delta = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) struct xdp_frame orig_frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) struct bpf_prog *xdp_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) unsigned int headroom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) /* bpf_xdp_adjust_head() assures BPF cannot access xdp_frame area */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) hard_start -= sizeof(struct xdp_frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) xdp_prog = rcu_dereference(rq->xdp_prog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) if (likely(xdp_prog)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) struct xdp_buff xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) u32 act;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) xdp_convert_frame_to_buff(frame, &xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) xdp.rxq = &rq->xdp_rxq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) act = bpf_prog_run_xdp(xdp_prog, &xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) switch (act) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) case XDP_PASS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) delta = frame->data - xdp.data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) len = xdp.data_end - xdp.data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) case XDP_TX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) orig_frame = *frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) xdp.rxq->mem = frame->mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) trace_xdp_exception(rq->dev, xdp_prog, act);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) frame = &orig_frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) stats->rx_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) goto err_xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) stats->xdp_tx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) goto xdp_xmit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) case XDP_REDIRECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) orig_frame = *frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) xdp.rxq->mem = frame->mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) frame = &orig_frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) stats->rx_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) goto err_xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) stats->xdp_redirect++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) goto xdp_xmit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) bpf_warn_invalid_xdp_action(act);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) case XDP_ABORTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) trace_xdp_exception(rq->dev, xdp_prog, act);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) case XDP_DROP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) stats->xdp_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) goto err_xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) skb = veth_build_skb(hard_start, headroom, len, frame->frame_sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) if (!skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) xdp_return_frame(frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) stats->rx_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) xdp_release_frame(frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) xdp_scrub_frame(frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) skb->protocol = eth_type_trans(skb, rq->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) err_xdp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) xdp_return_frame(frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) xdp_xmit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) struct veth_xdp_tx_bq *bq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) struct veth_stats *stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) u32 pktlen, headroom, act, metalen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) void *orig_data, *orig_data_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) struct bpf_prog *xdp_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) int mac_len, delta, off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) struct xdp_buff xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) skb_orphan(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) xdp_prog = rcu_dereference(rq->xdp_prog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (unlikely(!xdp_prog)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) mac_len = skb->data - skb_mac_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) pktlen = skb->len + mac_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) headroom = skb_headroom(skb) - mac_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (skb_shared(skb) || skb_head_is_locked(skb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) struct sk_buff *nskb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) int size, head_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) void *head, *start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) if (size > PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) head = page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) start = head + VETH_XDP_HEADROOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) page_frag_free(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) skb->len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (!nskb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) page_frag_free(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) skb_copy_header(nskb, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) head_off = skb_headroom(nskb) - skb_headroom(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) skb_headers_offset_update(nskb, head_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) consume_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) skb = nskb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) xdp.data_hard_start = skb->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) xdp.data = skb_mac_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) xdp.data_end = xdp.data + pktlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) xdp.data_meta = xdp.data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) xdp.rxq = &rq->xdp_rxq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) /* SKB "head" area always have tailroom for skb_shared_info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) xdp.frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) xdp.frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) orig_data = xdp.data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) orig_data_end = xdp.data_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) act = bpf_prog_run_xdp(xdp_prog, &xdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) switch (act) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) case XDP_PASS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) case XDP_TX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) get_page(virt_to_page(xdp.data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) consume_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) xdp.rxq->mem = rq->xdp_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) trace_xdp_exception(rq->dev, xdp_prog, act);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) stats->rx_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) goto err_xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) stats->xdp_tx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) goto xdp_xmit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) case XDP_REDIRECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) get_page(virt_to_page(xdp.data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) consume_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) xdp.rxq->mem = rq->xdp_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) stats->rx_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) goto err_xdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) stats->xdp_redirect++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) goto xdp_xmit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) bpf_warn_invalid_xdp_action(act);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) case XDP_ABORTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) trace_xdp_exception(rq->dev, xdp_prog, act);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) case XDP_DROP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) stats->xdp_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) goto xdp_drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) /* check if bpf_xdp_adjust_head was used */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) delta = orig_data - xdp.data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) off = mac_len + delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) if (off > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) __skb_push(skb, off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) else if (off < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) __skb_pull(skb, -off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) skb->mac_header -= delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) /* check if bpf_xdp_adjust_tail was used */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) off = xdp.data_end - orig_data_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) if (off != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) __skb_put(skb, off); /* positive on grow, negative on shrink */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) skb->protocol = eth_type_trans(skb, rq->dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) metalen = xdp.data - xdp.data_meta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) if (metalen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) skb_metadata_set(skb, metalen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) stats->rx_drops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) xdp_drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) err_xdp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) page_frag_free(xdp.data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) xdp_xmit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) static int veth_xdp_rcv(struct veth_rq *rq, int budget,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) struct veth_xdp_tx_bq *bq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) struct veth_stats *stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) int i, done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) for (i = 0; i < budget; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) void *ptr = __ptr_ring_consume(&rq->xdp_ring);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (!ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (veth_is_xdp_frame(ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) stats->xdp_bytes += frame->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) skb = veth_xdp_rcv_one(rq, frame, bq, stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) skb = ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) stats->xdp_bytes += skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) if (skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) napi_gro_receive(&rq->xdp_napi, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) done++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) u64_stats_update_begin(&rq->stats.syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) rq->stats.vs.xdp_redirect += stats->xdp_redirect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) rq->stats.vs.xdp_bytes += stats->xdp_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) rq->stats.vs.xdp_drops += stats->xdp_drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) rq->stats.vs.rx_drops += stats->rx_drops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) rq->stats.vs.xdp_packets += done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) u64_stats_update_end(&rq->stats.syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) return done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) static int veth_poll(struct napi_struct *napi, int budget)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) struct veth_rq *rq =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) container_of(napi, struct veth_rq, xdp_napi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) struct veth_stats stats = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) struct veth_xdp_tx_bq bq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) int done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) bq.count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) xdp_set_return_frame_no_direct();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) done = veth_xdp_rcv(rq, budget, &bq, &stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (done < budget && napi_complete_done(napi, done)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) /* Write rx_notify_masked before reading ptr_ring */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) smp_store_mb(rq->rx_notify_masked, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) if (napi_schedule_prep(&rq->xdp_napi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) WRITE_ONCE(rq->rx_notify_masked, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) __napi_schedule(&rq->xdp_napi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) if (stats.xdp_tx > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) veth_xdp_flush(rq, &bq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if (stats.xdp_redirect > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) xdp_do_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) xdp_clear_return_frame_no_direct();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) return done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) static int veth_napi_add(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) int err, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) struct veth_rq *rq = &priv->rq[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) goto err_xdp_ring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) struct veth_rq *rq = &priv->rq[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) napi_enable(&rq->xdp_napi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) err_xdp_ring:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) for (i--; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) static void veth_napi_del(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) struct veth_rq *rq = &priv->rq[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) napi_disable(&rq->xdp_napi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) __netif_napi_del(&rq->xdp_napi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) synchronize_net();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) struct veth_rq *rq = &priv->rq[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) rq->rx_notify_masked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) static int veth_enable_xdp(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) int err, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) struct veth_rq *rq = &priv->rq[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) goto err_rxq_reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) MEM_TYPE_PAGE_SHARED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) goto err_reg_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) /* Save original mem info as it can be overwritten */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) rq->xdp_mem = rq->xdp_rxq.mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) err = veth_napi_add(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) goto err_rxq_reg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) for (i = 0; i < dev->real_num_rx_queues; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) err_reg_mem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) err_rxq_reg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) for (i--; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) static void veth_disable_xdp(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) for (i = 0; i < dev->real_num_rx_queues; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) veth_napi_del(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) for (i = 0; i < dev->real_num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) struct veth_rq *rq = &priv->rq[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) rq->xdp_rxq.mem = rq->xdp_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) xdp_rxq_info_unreg(&rq->xdp_rxq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) static int veth_open(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) struct net_device *peer = rtnl_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) if (!peer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) if (priv->_xdp_prog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) err = veth_enable_xdp(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) if (peer->flags & IFF_UP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) netif_carrier_on(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) netif_carrier_on(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static int veth_close(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) struct net_device *peer = rtnl_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) netif_carrier_off(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) if (peer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) netif_carrier_off(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) if (priv->_xdp_prog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) veth_disable_xdp(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) static int is_valid_veth_mtu(int mtu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) static int veth_alloc_queues(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) if (!priv->rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) for (i = 0; i < dev->num_rx_queues; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) priv->rq[i].dev = dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) u64_stats_init(&priv->rq[i].stats.syncp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) static void veth_free_queues(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) kfree(priv->rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) static int veth_dev_init(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) if (!dev->lstats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) err = veth_alloc_queues(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) free_percpu(dev->lstats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) static void veth_dev_free(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) veth_free_queues(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) free_percpu(dev->lstats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) #ifdef CONFIG_NET_POLL_CONTROLLER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) static void veth_poll_controller(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) /* veth only receives frames when its peer sends one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * Since it has nothing to do with disabling irqs, we are guaranteed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) * never to have pending data when we poll for it so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * there is nothing to do here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) * We need this though so netpoll recognizes us as an interface that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) * supports polling, which enables bridge devices in virt setups to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) * still use netconsole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) #endif /* CONFIG_NET_POLL_CONTROLLER */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) static int veth_get_iflink(const struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) struct net_device *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) int iflink;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) peer = rcu_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) iflink = peer ? peer->ifindex : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) return iflink;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) static netdev_features_t veth_fix_features(struct net_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) netdev_features_t features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) struct net_device *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) peer = rtnl_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) if (peer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) struct veth_priv *peer_priv = netdev_priv(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) if (peer_priv->_xdp_prog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) features &= ~NETIF_F_GSO_SOFTWARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) return features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) struct veth_priv *peer_priv, *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) struct net_device *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) if (new_hr < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) new_hr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) peer = rcu_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) if (unlikely(!peer))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) peer_priv = netdev_priv(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) priv->requested_headroom = new_hr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) dev->needed_headroom = new_hr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) peer->needed_headroom = new_hr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) struct netlink_ext_ack *extack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) struct bpf_prog *old_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) struct net_device *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) unsigned int max_mtu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) old_prog = priv->_xdp_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) priv->_xdp_prog = prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) peer = rtnl_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) if (prog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) if (!peer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) err = -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) peer->hard_header_len -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) if (peer->mtu > max_mtu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) err = -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (dev->real_num_rx_queues < peer->real_num_tx_queues) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) err = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) if (dev->flags & IFF_UP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) err = veth_enable_xdp(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) if (!old_prog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) peer->max_mtu = max_mtu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) if (old_prog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) if (!prog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) if (dev->flags & IFF_UP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) veth_disable_xdp(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) if (peer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) peer->hw_features |= NETIF_F_GSO_SOFTWARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) peer->max_mtu = ETH_MAX_MTU;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) bpf_prog_put(old_prog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) if ((!!old_prog ^ !!prog) && peer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) netdev_update_features(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) priv->_xdp_prog = old_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) switch (xdp->command) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) case XDP_SETUP_PROG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) return veth_xdp_set(dev, xdp->prog, xdp->extack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) static const struct net_device_ops veth_netdev_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) .ndo_init = veth_dev_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) .ndo_open = veth_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) .ndo_stop = veth_close,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) .ndo_start_xmit = veth_xmit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) .ndo_get_stats64 = veth_get_stats64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) .ndo_set_rx_mode = veth_set_multicast_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) .ndo_set_mac_address = eth_mac_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) #ifdef CONFIG_NET_POLL_CONTROLLER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) .ndo_poll_controller = veth_poll_controller,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) .ndo_get_iflink = veth_get_iflink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) .ndo_fix_features = veth_fix_features,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) .ndo_features_check = passthru_features_check,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) .ndo_set_rx_headroom = veth_set_rx_headroom,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) .ndo_bpf = veth_xdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) .ndo_xdp_xmit = veth_ndo_xdp_xmit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) .ndo_get_peer_dev = veth_peer_dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static void veth_setup(struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) ether_setup(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) dev->priv_flags &= ~IFF_TX_SKB_SHARING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) dev->priv_flags |= IFF_NO_QUEUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) dev->priv_flags |= IFF_PHONY_HEADROOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) dev->netdev_ops = &veth_netdev_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) dev->ethtool_ops = &veth_ethtool_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) dev->features |= NETIF_F_LLTX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) dev->features |= VETH_FEATURES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) dev->vlan_features = dev->features &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) ~(NETIF_F_HW_VLAN_CTAG_TX |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) NETIF_F_HW_VLAN_STAG_TX |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) NETIF_F_HW_VLAN_CTAG_RX |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) NETIF_F_HW_VLAN_STAG_RX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) dev->needs_free_netdev = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) dev->priv_destructor = veth_dev_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) dev->max_mtu = ETH_MAX_MTU;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) dev->hw_features = VETH_FEATURES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) dev->hw_enc_features = VETH_FEATURES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) * netlink interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) struct netlink_ext_ack *extack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) if (tb[IFLA_ADDRESS]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) return -EADDRNOTAVAIL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) if (tb[IFLA_MTU]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) static struct rtnl_link_ops veth_link_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) static int veth_newlink(struct net *src_net, struct net_device *dev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) struct nlattr *tb[], struct nlattr *data[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) struct netlink_ext_ack *extack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) struct net_device *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) struct veth_priv *priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) char ifname[IFNAMSIZ];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) unsigned char name_assign_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) struct ifinfomsg *ifmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) struct net *net;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) * create and register peer first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) if (data != NULL && data[VETH_INFO_PEER] != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) struct nlattr *nla_peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) nla_peer = data[VETH_INFO_PEER];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) ifmp = nla_data(nla_peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) err = rtnl_nla_parse_ifla(peer_tb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) nla_data(nla_peer) + sizeof(struct ifinfomsg),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) nla_len(nla_peer) - sizeof(struct ifinfomsg),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) err = veth_validate(peer_tb, NULL, extack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) tbp = peer_tb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) ifmp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) tbp = tb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) if (ifmp && tbp[IFLA_IFNAME]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) name_assign_type = NET_NAME_USER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) name_assign_type = NET_NAME_ENUM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) net = rtnl_link_get_net(src_net, tbp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) if (IS_ERR(net))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) return PTR_ERR(net);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) peer = rtnl_create_link(net, ifname, name_assign_type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) &veth_link_ops, tbp, extack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) if (IS_ERR(peer)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) put_net(net);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) return PTR_ERR(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) if (!ifmp || !tbp[IFLA_ADDRESS])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) eth_hw_addr_random(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) if (ifmp && (dev->ifindex != 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) peer->ifindex = ifmp->ifi_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) peer->gso_max_size = dev->gso_max_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) peer->gso_max_segs = dev->gso_max_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) err = register_netdevice(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) put_net(net);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) net = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) goto err_register_peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) netif_carrier_off(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) err = rtnl_configure_link(peer, ifmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) goto err_configure_peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) * register dev last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) * note, that since we've registered new device the dev's name
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) * should be re-allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) if (tb[IFLA_ADDRESS] == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) eth_hw_addr_random(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) if (tb[IFLA_IFNAME])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) err = register_netdevice(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) goto err_register_dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) netif_carrier_off(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * tie the deviced together
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) rcu_assign_pointer(priv->peer, peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) priv = netdev_priv(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) rcu_assign_pointer(priv->peer, dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) err_register_dev:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) /* nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) err_configure_peer:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) unregister_netdevice(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) err_register_peer:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) free_netdev(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) static void veth_dellink(struct net_device *dev, struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) struct veth_priv *priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) struct net_device *peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) peer = rtnl_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) /* Note : dellink() is called from default_device_exit_batch(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) * before a rcu_synchronize() point. The devices are guaranteed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) * not being freed before one RCU grace period.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) RCU_INIT_POINTER(priv->peer, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) unregister_netdevice_queue(dev, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) if (peer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) priv = netdev_priv(peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) RCU_INIT_POINTER(priv->peer, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) unregister_netdevice_queue(peer, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) static struct net *veth_get_link_net(const struct net_device *dev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) struct veth_priv *priv = netdev_priv(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) struct net_device *peer = rtnl_dereference(priv->peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) return peer ? dev_net(peer) : dev_net(dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) static struct rtnl_link_ops veth_link_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) .kind = DRV_NAME,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) .priv_size = sizeof(struct veth_priv),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) .setup = veth_setup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) .validate = veth_validate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) .newlink = veth_newlink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) .dellink = veth_dellink,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) .policy = veth_policy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) .maxtype = VETH_INFO_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) .get_link_net = veth_get_link_net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) * init/fini
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) static __init int veth_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) return rtnl_link_register(&veth_link_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) static __exit void veth_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) rtnl_link_unregister(&veth_link_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) module_init(veth_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) module_exit(veth_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) MODULE_LICENSE("GPL v2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) MODULE_ALIAS_RTNL_LINK(DRV_NAME);