^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * TCP NV: TCP with Congestion Avoidance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * TCP-NV is a successor of TCP-Vegas that has been developed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * deal with the issues that occur in modern networks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Like TCP-Vegas, TCP-NV supports true congestion avoidance,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * the ability to detect congestion before packet losses occur.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * When congestion (queue buildup) starts to occur, TCP-NV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * predicts what the cwnd size should be for the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * throughput and it reduces the cwnd proportionally to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * the difference between the current cwnd and the predicted cwnd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * NV is only recommeneded for traffic within a data center, and when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * all the flows are NV (at least those within the data center). This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * is due to the inherent unfairness between flows using losses to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * detect congestion (congestion control) and those that use queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * buildup to detect congestion (congestion avoidance).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Note: High NIC coalescence values may lower the performance of NV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * due to the increased noise in RTT values. In particular, we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * seen issues with rx-frames values greater than 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * TODO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * 1) Add mechanism to deal with reverse congestion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/math64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <net/tcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/inet_diag.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /* TCP NV parameters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * nv_pad Max number of queued packets allowed in network
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * nv_pad_buffer Do not grow cwnd if this closed to nv_pad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * nv_reset_period How often (in) seconds)to reset min_rtt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * nv_min_cwnd Don't decrease cwnd below this if there are no losses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * nv_rtt_factor RTT averaging factor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * slow-start due to congestion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * nv_stop_rtt_cnt Only grow cwnd for this many RTTs after non-congestion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * nv_rtt_min_cnt Wait these many RTTs before making congesion decision
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * nv_cwnd_growth_rate_neg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * nv_cwnd_growth_rate_pos
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * How quickly to double growth rate (not rate) of cwnd when not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * congested. One value (nv_cwnd_growth_rate_neg) for when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * rate < 1 pkt/RTT (after losses). The other (nv_cwnd_growth_rate_pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) static int nv_pad __read_mostly = 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static int nv_pad_buffer __read_mostly = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) static int nv_reset_period __read_mostly = 5; /* in seconds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) static int nv_min_cwnd __read_mostly = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static int nv_cwnd_growth_rate_neg __read_mostly = 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static int nv_dec_eval_min_calls __read_mostly = 60;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) static int nv_inc_eval_min_calls __read_mostly = 20;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static int nv_ssthresh_eval_min_calls __read_mostly = 30;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static int nv_stop_rtt_cnt __read_mostly = 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static int nv_rtt_min_cnt __read_mostly = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) module_param(nv_pad, int, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) MODULE_PARM_DESC(nv_pad, "max queued packets allowed in network");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) module_param(nv_reset_period, int, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) MODULE_PARM_DESC(nv_reset_period, "nv_min_rtt reset period (secs)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) module_param(nv_min_cwnd, int, 0644);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) MODULE_PARM_DESC(nv_min_cwnd, "NV will not decrease cwnd below this value"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) " without losses");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* TCP NV Parameters */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) struct tcpnv {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) unsigned long nv_min_rtt_reset_jiffies; /* when to switch to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * nv_min_rtt_new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) s8 cwnd_growth_factor; /* Current cwnd growth factor,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * < 0 => less than 1 packet/RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) u8 available8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) u16 available16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) u8 nv_allow_cwnd_growth:1, /* whether cwnd can grow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) nv_reset:1, /* whether to reset values */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) nv_catchup:1; /* whether we are growing because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * of temporary cwnd decrease */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) u8 nv_eval_call_cnt; /* call count since last eval */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) u8 nv_min_cwnd; /* nv won't make a ca decision if cwnd is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * smaller than this. It may grow to handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * TSO, LRO and interrupt coalescence because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * with these a small cwnd cannot saturate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * the link. Note that this is different from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * the file local nv_min_cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) u8 nv_rtt_cnt; /* RTTs without making ca decision */;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) u32 nv_last_rtt; /* last rtt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) u32 nv_min_rtt; /* active min rtt. Used to determine slope */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) u32 nv_min_rtt_new; /* min rtt for future use */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) u32 nv_base_rtt; /* If non-zero it represents the threshold for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * congestion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * set to 80% of nv_base_rtt. It helps reduce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * unfairness between flows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) u32 nv_rtt_max_rate; /* max rate seen during current RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) u32 nv_rtt_start_seq; /* current RTT ends when packet arrives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * acking beyond nv_rtt_start_seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) u32 nv_last_snd_una; /* Previous value of tp->snd_una. It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * used to determine bytes acked since last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * call to bictcp_acked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) u32 nv_no_cong_cnt; /* Consecutive no congestion decisions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #define NV_INIT_RTT U32_MAX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) #define NV_MIN_CWND 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) #define NV_MIN_CWND_GROW 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) #define NV_TSO_CWND_BOUND 80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) ca->nv_reset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) ca->nv_no_cong_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) ca->nv_rtt_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) ca->nv_last_rtt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) ca->nv_rtt_max_rate = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) ca->nv_rtt_start_seq = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) ca->nv_eval_call_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ca->nv_last_snd_una = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) static void tcpnv_init(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) struct tcpnv *ca = inet_csk_ca(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) int base_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) tcpnv_reset(ca, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /* See if base_rtt is available from socket_ops bpf program.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * It is meant to be used in environments, such as communication
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * within a datacenter, where we have reasonable estimates of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * RTTs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (base_rtt > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) ca->nv_base_rtt = base_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) ca->nv_base_rtt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) ca->nv_lower_bound_rtt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) ca->nv_allow_cwnd_growth = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) ca->nv_min_rtt = NV_INIT_RTT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) ca->nv_min_rtt_new = NV_INIT_RTT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) ca->nv_min_cwnd = NV_MIN_CWND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) ca->nv_catchup = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) ca->cwnd_growth_factor = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* If provided, apply upper (base_rtt) and lower (lower_bound_rtt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * bounds to RTT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) return ca->nv_lower_bound_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) return ca->nv_base_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) struct tcpnv *ca = inet_csk_ca(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) u32 cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (!tcp_is_cwnd_limited(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) /* Only grow cwnd if NV has not detected congestion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (!ca->nv_allow_cwnd_growth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (tcp_in_slow_start(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) acked = tcp_slow_start(tp, acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (!acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (ca->cwnd_growth_factor < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) tcp_cong_avoid_ai(tp, cnt, acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) tcp_cong_avoid_ai(tp, cnt, acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) static u32 tcpnv_recalc_ssthresh(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) static void tcpnv_state(struct sock *sk, u8 new_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) struct tcpnv *ca = inet_csk_ca(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (new_state == TCP_CA_Open && ca->nv_reset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) tcpnv_reset(ca, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) } else if (new_state == TCP_CA_Loss || new_state == TCP_CA_CWR ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) new_state == TCP_CA_Recovery) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) ca->nv_reset = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) ca->nv_allow_cwnd_growth = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) if (new_state == TCP_CA_Loss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) /* Reset cwnd growth factor to Reno value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) if (ca->cwnd_growth_factor > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) ca->cwnd_growth_factor = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) /* Decrease growth rate if allowed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (nv_cwnd_growth_rate_neg > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) ca->cwnd_growth_factor > -8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) ca->cwnd_growth_factor--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) /* Do congestion avoidance calculations for TCP-NV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) struct tcpnv *ca = inet_csk_ca(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) u64 rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) u32 rate, max_win, cwnd_by_slope;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) u32 avg_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) u32 bytes_acked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /* Some calls are for duplicates without timetamps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (sample->rtt_us < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /* If not in TCP_CA_Open or TCP_CA_Disorder states, skip. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) if (icsk->icsk_ca_state != TCP_CA_Open &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) icsk->icsk_ca_state != TCP_CA_Disorder)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /* Stop cwnd growth if we were in catch up mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) ca->nv_catchup = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) ca->nv_allow_cwnd_growth = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) bytes_acked = tp->snd_una - ca->nv_last_snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) ca->nv_last_snd_una = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) if (sample->in_flight == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /* Calculate moving average of RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) if (nv_rtt_factor > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (ca->nv_last_rtt > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) avg_rtt = (((u64)sample->rtt_us) * nv_rtt_factor +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) ((u64)ca->nv_last_rtt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * (256 - nv_rtt_factor)) >> 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) avg_rtt = sample->rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) ca->nv_min_rtt = avg_rtt << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) ca->nv_last_rtt = avg_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) avg_rtt = sample->rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) /* rate in 100's bits per second */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) rate64 = ((u64)sample->in_flight) * 80000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) do_div(rate64, avg_rtt ?: 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) rate = (u32)rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /* Remember the maximum rate seen during this RTT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * Note: It may be more than one RTT. This function should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * called at least nv_dec_eval_min_calls times.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (ca->nv_rtt_max_rate < rate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) ca->nv_rtt_max_rate = rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) /* We have valid information, increment counter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (ca->nv_eval_call_cnt < 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) ca->nv_eval_call_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) /* Apply bounds to rtt. Only used to update min_rtt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) /* update min rtt if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (avg_rtt < ca->nv_min_rtt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) ca->nv_min_rtt = avg_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /* update future min_rtt if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (avg_rtt < ca->nv_min_rtt_new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) ca->nv_min_rtt_new = avg_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /* nv_min_rtt is updated with the minimum (possibley averaged) rtt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * seen in the last sysctl_tcp_nv_reset_period seconds (i.e. a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * warm reset). This new nv_min_rtt will be continued to be updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * and be used for another sysctl_tcp_nv_reset_period seconds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * when it will be updated again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * In practice we introduce some randomness, so the actual period used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * is chosen randomly from the range:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * [sysctl_tcp_nv_reset_period*3/4, sysctl_tcp_nv_reset_period*5/4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (time_after_eq(now, ca->nv_min_rtt_reset_jiffies)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) unsigned char rand;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) ca->nv_min_rtt = ca->nv_min_rtt_new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) ca->nv_min_rtt_new = NV_INIT_RTT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) get_random_bytes(&rand, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) ca->nv_min_rtt_reset_jiffies =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) now + ((nv_reset_period * (384 + rand) * HZ) >> 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) /* Every so often we decrease ca->nv_min_cwnd in case previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * value is no longer accurate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) ca->nv_min_cwnd = max(ca->nv_min_cwnd / 2, NV_MIN_CWND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) /* Once per RTT check if we need to do congestion avoidance */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (before(ca->nv_rtt_start_seq, tp->snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) ca->nv_rtt_start_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (ca->nv_rtt_cnt < 0xff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* Increase counter for RTTs without CA decision */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) ca->nv_rtt_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) /* If this function is only called once within an RTT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * the cwnd is probably too small (in some cases due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * tso, lro or interrupt coalescence), so we increase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * ca->nv_min_cwnd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (ca->nv_eval_call_cnt == 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) bytes_acked >= (ca->nv_min_cwnd - 1) * tp->mss_cache &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) ca->nv_min_cwnd < (NV_TSO_CWND_BOUND + 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) ca->nv_min_cwnd = min(ca->nv_min_cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) + NV_MIN_CWND_GROW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) NV_TSO_CWND_BOUND + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) ca->nv_rtt_start_seq = tp->snd_nxt +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) ca->nv_min_cwnd * tp->mss_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) ca->nv_eval_call_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) ca->nv_allow_cwnd_growth = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) /* Find the ideal cwnd for current rate from slope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * slope = 80000.0 * mss / nv_min_rtt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * cwnd_by_slope = nv_rtt_max_rate / slope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) cwnd_by_slope = (u32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) div64_u64(((u64)ca->nv_rtt_max_rate) * ca->nv_min_rtt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 80000ULL * tp->mss_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) max_win = cwnd_by_slope + nv_pad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /* If cwnd > max_win, decrease cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * if cwnd < max_win, grow cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * else leave the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (tp->snd_cwnd > max_win) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) /* there is congestion, check that it is ok
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * to make a CA decision
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * 1. We should have at least nv_dec_eval_min_calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * data points before making a CA decision
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) * 2. We only make a congesion decision after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * nv_rtt_min_cnt RTTs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (ca->nv_rtt_cnt < nv_rtt_min_cnt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) } else if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) if (ca->nv_eval_call_cnt <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) nv_ssthresh_eval_min_calls)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) /* otherwise we will decrease cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) } else if (ca->nv_eval_call_cnt <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) nv_dec_eval_min_calls) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (ca->nv_allow_cwnd_growth &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) ca->nv_rtt_cnt > nv_stop_rtt_cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) ca->nv_allow_cwnd_growth = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) /* We have enough data to determine we are congested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) ca->nv_allow_cwnd_growth = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) tp->snd_ssthresh =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) (nv_ssthresh_factor * max_win) >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) if (tp->snd_cwnd - max_win > 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) /* gap > 2, we do exponential cwnd decrease */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) int dec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) dec = max(2U, ((tp->snd_cwnd - max_win) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) nv_cong_dec_mult) >> 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) tp->snd_cwnd -= dec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) } else if (nv_cong_dec_mult > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) tp->snd_cwnd = max_win;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) if (ca->cwnd_growth_factor > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) ca->cwnd_growth_factor = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) ca->nv_no_cong_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) /* There is no congestion, grow cwnd if allowed*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) ca->nv_allow_cwnd_growth = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) ca->nv_no_cong_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (ca->cwnd_growth_factor < 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) nv_cwnd_growth_rate_neg > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) ca->nv_no_cong_cnt > nv_cwnd_growth_rate_neg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) ca->cwnd_growth_factor++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) ca->nv_no_cong_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) } else if (ca->cwnd_growth_factor >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) nv_cwnd_growth_rate_pos > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) ca->nv_no_cong_cnt >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) nv_cwnd_growth_rate_pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) ca->cwnd_growth_factor++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) ca->nv_no_cong_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /* cwnd is in-between, so do nothing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) /* update state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) ca->nv_eval_call_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) ca->nv_rtt_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) ca->nv_rtt_max_rate = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) /* Don't want to make cwnd < nv_min_cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * (it wasn't before, if it is now is because nv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * decreased it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (tp->snd_cwnd < nv_min_cwnd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) tp->snd_cwnd = nv_min_cwnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) /* Extract info for Tcp socket info provided via netlink */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) union tcp_cc_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) const struct tcpnv *ca = inet_csk_ca(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) info->vegas.tcpv_enabled = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) info->vegas.tcpv_rttcnt = ca->nv_rtt_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) info->vegas.tcpv_rtt = ca->nv_last_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) info->vegas.tcpv_minrtt = ca->nv_min_rtt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) *attr = INET_DIAG_VEGASINFO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) return sizeof(struct tcpvegas_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) static struct tcp_congestion_ops tcpnv __read_mostly = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) .init = tcpnv_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) .ssthresh = tcpnv_recalc_ssthresh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) .cong_avoid = tcpnv_cong_avoid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) .set_state = tcpnv_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) .undo_cwnd = tcp_reno_undo_cwnd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) .pkts_acked = tcpnv_acked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) .get_info = tcpnv_get_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) .name = "nv",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) static int __init tcpnv_register(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) BUILD_BUG_ON(sizeof(struct tcpnv) > ICSK_CA_PRIV_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) return tcp_register_congestion_control(&tcpnv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) static void __exit tcpnv_unregister(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) tcp_unregister_congestion_control(&tcpnv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) module_init(tcpnv_register);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) module_exit(tcpnv_unregister);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) MODULE_AUTHOR("Lawrence Brakmo");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) MODULE_LICENSE("GPL");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) MODULE_DESCRIPTION("TCP NV");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) MODULE_VERSION("1.0");