^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* Copyright (c) 2017 Facebook
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * This program is free software; you can redistribute it and/or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * modify it under the terms of version 2 of the GNU General Public
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * License as published by the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * BPF program to set congestion control to dctcp when both hosts are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * in the same datacenter (as deteremined by IPv6 prefix).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <uapi/linux/bpf.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <uapi/linux/tcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <uapi/linux/if_ether.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <uapi/linux/if_packet.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <uapi/linux/ip.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/socket.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <bpf/bpf_helpers.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <bpf/bpf_endian.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define DEBUG 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) SEC("sockops")
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) int bpf_cong(struct bpf_sock_ops *skops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) char cong[] = "dctcp";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) int rv = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) int op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /* For testing purposes, only execute rest of BPF program
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * if neither port numberis 55601
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) if (bpf_ntohl(skops->remote_port) != 55601 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) skops->local_port != 55601) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) skops->reply = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) op = (int) skops->op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) bpf_printk("BPF command: %d\n", op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) /* Check if both hosts are in the same datacenter. For this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * example they are if the 1st 5.5 bytes in the IPv6 address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * are the same.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if (skops->family == AF_INET6 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) skops->local_ip6[0] == skops->remote_ip6[0] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) case BPF_SOCK_OPS_NEEDS_ECN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) rv = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) cong, sizeof(cong));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) cong, sizeof(cong));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) rv = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) rv = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) bpf_printk("Returning %d\n", rv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) skops->reply = rv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) char _license[] SEC("license") = "GPL";