^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This program is free software; you can redistribute it and/or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * modify it under the terms of version 2 of the GNU General Public
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * License as published by the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * This program is distributed in the hope that it will be useful, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * WITHOUT ANY WARRANTY; without even the implied warranty of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * General Public License for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define KBUILD_MODNAME "foo"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <uapi/linux/bpf.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/in.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/if_ether.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/if_packet.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/if_vlan.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/ip.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/ipv6.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <bpf/bpf_helpers.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) __uint(type, BPF_MAP_TYPE_DEVMAP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) __uint(key_size, sizeof(int));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) __uint(value_size, sizeof(int));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) __uint(max_entries, 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) } xdp_tx_ports SEC(".maps");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) /* from include/net/ip.h */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static __always_inline int ip_decrease_ttl(struct iphdr *iph)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) u32 check = (__force u32)iph->check;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) check += (__force u32)htons(0x0100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) iph->check = (__force __sum16)(check + (check >= 0xFFFF));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) return --iph->ttl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) void *data_end = (void *)(long)ctx->data_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) void *data = (void *)(long)ctx->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) struct bpf_fib_lookup fib_params;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct ethhdr *eth = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) struct ipv6hdr *ip6h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) struct iphdr *iph;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) u16 h_proto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) u64 nh_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) int rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) nh_off = sizeof(*eth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) if (data + nh_off > data_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) return XDP_DROP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) __builtin_memset(&fib_params, 0, sizeof(fib_params));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) h_proto = eth->h_proto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) if (h_proto == htons(ETH_P_IP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) iph = data + nh_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) if (iph + 1 > data_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) return XDP_DROP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) if (iph->ttl <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) return XDP_PASS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) fib_params.family = AF_INET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) fib_params.tos = iph->tos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) fib_params.l4_protocol = iph->protocol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) fib_params.sport = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) fib_params.dport = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) fib_params.tot_len = ntohs(iph->tot_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) fib_params.ipv4_src = iph->saddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) fib_params.ipv4_dst = iph->daddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) } else if (h_proto == htons(ETH_P_IPV6)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) ip6h = data + nh_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) if (ip6h + 1 > data_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) return XDP_DROP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) if (ip6h->hop_limit <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) return XDP_PASS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) fib_params.family = AF_INET6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) fib_params.l4_protocol = ip6h->nexthdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) fib_params.sport = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) fib_params.dport = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) fib_params.tot_len = ntohs(ip6h->payload_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) *src = ip6h->saddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) *dst = ip6h->daddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return XDP_PASS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) fib_params.ifindex = ctx->ingress_ifindex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * Some rc (return codes) from bpf_fib_lookup() are important,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * to understand how this XDP-prog interacts with network stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * BPF_FIB_LKUP_RET_NO_NEIGH:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * Even if route lookup was a success, then the MAC-addresses are also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * needed. This is obtained from arp/neighbour table, but if table is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned. To avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * doing ARP lookup directly from XDP, then send packet to normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * network stack via XDP_PASS and expect it will do ARP resolution.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * BPF_FIB_LKUP_RET_FWD_DISABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * enabled this on ingress device.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* Verify egress index has been configured as TX-port.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * (Note: User can still have inserted an egress ifindex that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * doesn't support XDP xmit, which will result in packet drops).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * Note: lookup in devmap supported since 0cdbb4b09a0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * If not supported will fail with:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * cannot pass map_type 14 into func bpf_map_lookup_elem#1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return XDP_PASS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (h_proto == htons(ETH_P_IP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) ip_decrease_ttl(iph);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) else if (h_proto == htons(ETH_P_IPV6))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) ip6h->hop_limit--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) return XDP_PASS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) SEC("xdp_fwd")
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) int xdp_fwd_prog(struct xdp_md *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return xdp_fwd_flags(ctx, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) SEC("xdp_fwd_direct")
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) int xdp_fwd_direct_prog(struct xdp_md *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) char _license[] SEC("license") = "GPL";