^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * INET An implementation of the TCP/IP protocol suite for the LINUX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * operating system. INET is implemented using the BSD Socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * interface as the means of communication with the user level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Implementation of the Transmission Control Protocol(TCP).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Authors: Ross Biro
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Mark Evans, <evansmp@uhura.aston.ac.uk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Corey Minyard <wf-rch!minyard@relay.EU.net>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Florian La Roche, <flla@stud.uni-sb.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Linus Torvalds, <torvalds@cs.helsinki.fi>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Alan Cox, <gw4pts@gw4pts.ampr.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Matthew Dillon, <dillon@apollo.west.oic.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Jorge Cwik, <jorge@laser.satlink.net>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Changes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Pedro Roque : Fast Retransmit/Recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * Two receive queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * Retransmit queue handled by TCP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * Better retransmit timer handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * New congestion avoidance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * Header prediction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * Variable renaming.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * Eric : Fast Retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * Randy Scott : MSS option defines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * Eric Schenk : Fixes to slow start algorithm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * Eric Schenk : Yet another double ACK bug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * Eric Schenk : Delayed ACK bug fixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * Eric Schenk : Floyd style fast retrans war avoidance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * David S. Miller : Don't allow zero congestion window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * Eric Schenk : Fix retransmitter so that it sends
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * next packet on ack of previous packet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * Andi Kleen : Moved open_request checking here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * and process RSTs for open_requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * Andi Kleen : Better prune_queue, and other fixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * Andrey Savochkin: Fix RTT measurements in the presence of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * timestamps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Andrey Savochkin: Check sequence numbers correctly when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * removing SACKs due to in sequence incoming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * data segments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Andi Kleen: Make sure we never ack data there is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * enough room for. Also make this condition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * a fatal error if it might still happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * Andi Kleen: Add tcp_measure_rcv_mss to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * connections with MSS<min(MTU,ann. MSS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * work without delayed acks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * Andi Kleen: Process packets with PSH set in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * fast path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * J Hadi Salim: ECN support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * Andrei Gurtov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * Pasi Sarolahti,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * Panu Kuhlberg: Experimental audit of TCP (re)transmission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * engine. Lots of bugs are found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Pasi Sarolahti: F-RTO for dealing with spurious RTOs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #define pr_fmt(fmt) "TCP: " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #include <net/dst.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #include <net/tcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #include <net/inet_common.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #include <linux/ipsec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #include <asm/unaligned.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #include <linux/errqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #include <trace/events/tcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #include <linux/jump_label_ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #include <net/busy_poll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #include <net/mptcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #include <trace/hooks/net.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #define FLAG_DATA 0x01 /* Incoming frame contained data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define FLAG_DATA_SACKED 0x20 /* New SACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) #define FLAG_ECE 0x40 /* ECE in this ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) #define REXMIT_NONE 0 /* no loss recovery to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) #define REXMIT_LOST 1 /* retransmit packets marked lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) #if IS_ENABLED(CONFIG_TLS_DEVICE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) void clean_acked_data_enable(struct inet_connection_sock *icsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) void (*cad)(struct sock *sk, u32 ack_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) icsk->icsk_clean_acked = cad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) static_branch_deferred_inc(&clean_acked_data_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) EXPORT_SYMBOL_GPL(clean_acked_data_enable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) void clean_acked_data_disable(struct inet_connection_sock *icsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static_branch_slow_dec_deferred(&clean_acked_data_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) icsk->icsk_clean_acked = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) EXPORT_SYMBOL_GPL(clean_acked_data_disable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) void clean_acked_data_flush(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) static_key_deferred_flush(&clean_acked_data_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) EXPORT_SYMBOL_GPL(clean_acked_data_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) #ifdef CONFIG_CGROUP_BPF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) struct bpf_sock_ops_kern sock_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (likely(!unknown_opt && !parse_all_opt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* The skb will be handled in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * bpf_skops_established() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * bpf_skops_write_hdr_opt().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) case TCP_SYN_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) case TCP_SYN_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) case TCP_LISTEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) sock_owned_by_me(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) sock_ops.is_fullsock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) sock_ops.sk = sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) static void bpf_skops_established(struct sock *sk, int bpf_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) struct bpf_sock_ops_kern sock_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) sock_owned_by_me(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) sock_ops.op = bpf_op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) sock_ops.is_fullsock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) sock_ops.sk = sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) if (skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) static void bpf_skops_established(struct sock *sk, int bpf_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) static bool __once __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (!__once) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) struct net_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) __once = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if (!dev || len >= dev->mtu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) dev ? dev->name : "Unknown driver");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) /* Adapt the MSS value used to make delayed ack decision to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * real world.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) const unsigned int lss = icsk->icsk_ack.last_seg_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) icsk->icsk_ack.last_seg_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) /* skb->len may jitter because of SACKs, even if peer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * sends good full-sized frames.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) len = skb_shinfo(skb)->gso_size ? : skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (len >= icsk->icsk_ack.rcv_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) tcp_sk(sk)->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /* Account for possibly-removed options */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) if (unlikely(len > icsk->icsk_ack.rcv_mss +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) MAX_TCP_OPTION_SPACE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) tcp_gro_dev_warn(sk, skb, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) /* Otherwise, we make more careful check taking into account,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * that SACKs block is variable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * "len" is invariant segment length, including TCP header.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) len += skb->data - skb_transport_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /* If PSH is not set, packet should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * full sized, provided peer TCP is not badly broken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * This observation (if it is correct 8)) allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * to handle super-low mtu links fairly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /* Subtract also invariant (if peer is RFC compliant),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * tcp header plus fixed timestamp option length.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * Resulting "len" is MSS free of SACK jitter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) len -= tcp_sk(sk)->tcp_header_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) icsk->icsk_ack.last_seg_size = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (len == lss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) icsk->icsk_ack.rcv_mss = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (quickacks == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) quickacks = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) quickacks = min(quickacks, max_quickacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) if (quickacks > icsk->icsk_ack.quick)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) icsk->icsk_ack.quick = quickacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) tcp_incr_quickack(sk, max_quickacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) inet_csk_exit_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) icsk->icsk_ack.ato = TCP_ATO_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) EXPORT_SYMBOL(tcp_enter_quickack_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) /* Send ACKs quickly, if "quick" count is not exhausted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * and the session is not interactive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) static bool tcp_in_quickack_mode(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) const struct dst_entry *dst = __sk_dst_get(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (tp->ecn_flags & TCP_ECN_OK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) if (tcp_hdr(skb)->cwr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) /* If the sender is telling us it has entered CWR, then its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * cwnd may be very low (even just 1 packet), so we should ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) case INET_ECN_NOT_ECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) /* Funny extension: if ECT is not set on a segment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * and we already seen ECT on a previous segment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * it is probably a retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) if (tp->ecn_flags & TCP_ECN_SEEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) tcp_enter_quickack_mode(sk, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) case INET_ECN_CE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (tcp_ca_needs_ecn(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /* Better not delay acks, sender can have a very low cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) tcp_enter_quickack_mode(sk, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) tp->ecn_flags |= TCP_ECN_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (tcp_ca_needs_ecn(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) tp->ecn_flags |= TCP_ECN_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) __tcp_ecn_check_ce(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) tp->ecn_flags &= ~TCP_ECN_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) tp->ecn_flags &= ~TCP_ECN_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) /* Buffer size and advertised window tuning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * 1. Tuning sk->sk_sndbuf, when connection enters established state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) static void tcp_sndbuf_expand(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) int sndmem, per_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) u32 nr_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) /* Worst case is non GSO/TSO : each frame consumes one skb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * and skb->head is kmalloced using power of two area of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) MAX_TCP_HEADER +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) per_mss = roundup_pow_of_two(per_mss) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) SKB_DATA_ALIGN(sizeof(struct sk_buff));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) /* Fast Recovery (RFC 5681 3.2) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) * Cubic needs 1.7 factor, rounded to 2 to include
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * extra cushion (application might react slowly to EPOLLOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) sndmem *= nr_segs * per_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) if (sk->sk_sndbuf < sndmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) WRITE_ONCE(sk->sk_sndbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * All tcp_full_space() is split to two parts: "network" buffer, allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * forward and advertised in receiver window (tp->rcv_wnd) and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * "application buffer", required to isolate scheduling/application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * latencies from network.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * window_clamp is maximal advertised window. It can be less than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * tcp_full_space(), in this case tcp_full_space() - window_clamp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * is reserved for "application" buffer. The less window_clamp is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) * the smoother our behaviour from viewpoint of network, but the lower
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * throughput and the higher sensitivity of the connection to losses. 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * rcv_ssthresh is more strict window_clamp used at "slow start"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * phase to predict further behaviour of this connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * It is used for two goals:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * - to enforce header prediction at sender, even when application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * requires some significant "application buffer". It is check #1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * - to prevent pruning of receive queue because of misprediction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * of receiver window. Check #2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * The scheme does not work when sender sends good segments opening
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * window and then starts to feed us spaghetti. But it should work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * in common situations. Otherwise, we have to rely on queue collapsing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) /* Slow part of check#2. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) /* Optimize this! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) while (tp->rcv_ssthresh <= window) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) if (truesize <= skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) truesize >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) window >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) int room;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /* Check #1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) if (room > 0 && !tcp_under_memory_pressure(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) int incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /* Check #2. Increase window, if skb with such overhead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * will fit to rcvbuf in future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) incr = 2 * tp->advmss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) incr = __tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) if (incr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) incr = max_t(int, incr, 2 * skb->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) tp->rcv_ssthresh += min(room, incr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) inet_csk(sk)->icsk_ack.quick |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) /* 3. Try to fixup all. It is made immediately after connection enters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * established state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) static void tcp_init_buffer_space(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) int maxwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) tcp_sndbuf_expand(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) tp->rcvq_space.time = tp->tcp_mstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) tp->rcvq_space.seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) maxwin = tcp_full_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (tp->window_clamp >= maxwin) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) tp->window_clamp = maxwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if (tcp_app_win && maxwin > 4 * tp->advmss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) tp->window_clamp = max(maxwin -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) (maxwin >> tcp_app_win),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 4 * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) /* Force reservation of one segment. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) if (tcp_app_win &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) tp->window_clamp > 2 * tp->advmss &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) tp->window_clamp + tp->advmss > maxwin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) (u32)TCP_INIT_CWND * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) /* 4. Recalculate window clamp after socket hit its memory bounds. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) static void tcp_clamp_window(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) icsk->icsk_ack.quick = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) !tcp_under_memory_pressure(sk) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) WRITE_ONCE(sk->sk_rcvbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) min(atomic_read(&sk->sk_rmem_alloc),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) net->ipv4.sysctl_tcp_rmem[2]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) /* Initialize RCV_MSS value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * RCV_MSS is an our guess about MSS used by the peer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * We haven't any direct information about the MSS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * It's better to underestimate the RCV_MSS rather than overestimate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * Overestimations make us ACKing less frequently than needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) void tcp_initialize_rcv_mss(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) hint = min(hint, tp->rcv_wnd / 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) hint = min(hint, TCP_MSS_DEFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) hint = max(hint, TCP_MIN_MSS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) inet_csk(sk)->icsk_ack.rcv_mss = hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) EXPORT_SYMBOL(tcp_initialize_rcv_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) /* Receiver "autotuning" code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * The algorithm for RTT estimation w/o timestamps is based on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * <https://public.lanl.gov/radiant/pubs.html#DRS>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * More detail on this code can be found at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) * <http://staff.psc.edu/jheffner/>,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * though this reference is out of date. A new paper
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) * is pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) u32 new_sample = tp->rcv_rtt_est.rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) long m = sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (new_sample != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) /* If we sample in larger samples in the non-timestamp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) * case, we could grossly overestimate the RTT especially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * with chatty applications or bulk transfer apps which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * are stalled on filesystem I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) * Also, since we are only going for a minimum in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * non-timestamp case, we do not smooth things out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * else with timestamps disabled convergence takes too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (!win_dep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) m -= (new_sample >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) new_sample += m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) m <<= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) if (m < new_sample)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) new_sample = m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /* No previous measure. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) new_sample = m << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) tp->rcv_rtt_est.rtt_us = new_sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) u32 delta_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (tp->rcv_rtt_est.time == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) goto new_measure;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) if (!delta_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) delta_us = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) tcp_rcv_rtt_update(tp, delta_us, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) new_measure:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) tp->rcv_rtt_est.time = tp->tcp_mstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) if (TCP_SKB_CB(skb)->end_seq -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) u32 delta_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) if (!delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) delta = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) tcp_rcv_rtt_update(tp, delta_us, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * This function should be called every time data is copied to user space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * It calculates the appropriate TCP receive buffer space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) void tcp_rcv_space_adjust(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) u32 copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) int time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) trace_tcp_rcv_space_adjust(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) /* Number of bytes copied to user in last RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) copied = tp->copied_seq - tp->rcvq_space.seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) if (copied <= tp->rcvq_space.space)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) goto new_measure;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) /* A bit of theory :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) * copied = bytes received in previous RTT, our base window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) * To cope with packet losses, we need a 2x factor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) * To cope with slow start, and sender growing its cwin by 100 %
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) * every RTT, we need a 4x factor, because the ACK we are sending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * now is for the next RTT, not the current one :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * <prev RTT . ><current RTT .. ><next RTT .... >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) int rcvmem, rcvbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) u64 rcvwin, grow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) /* minimal window to cope with packet losses, assuming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * steady state. Add some cushion because of small variations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) /* Accommodate for sender rate increase (eg. slow start) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) grow = rcvwin * (copied - tp->rcvq_space.space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) do_div(grow, tp->rcvq_space.space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) rcvwin += (grow << 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) rcvmem += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) do_div(rcvwin, tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) rcvbuf = min_t(u64, rcvwin * rcvmem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) if (rcvbuf > sk->sk_rcvbuf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) /* Make the window clamp follow along. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) tp->rcvq_space.space = copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) new_measure:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) tp->rcvq_space.seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) tp->rcvq_space.time = tp->tcp_mstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) /* There is something which you must keep in mind when you analyze the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) * behavior of the tp->ato delayed ack timeout interval. When a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) * connection starts up, we want to ack as quickly as possible. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) * problem is that "good" TCP's do slow start at the beginning of data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) * transmission. The means that until we send the first few ACK's the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) * sender will sit on his end and only queue most of his data, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) * he can only send snd_cwnd unacked packets at any given time. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) * each ACK we send, he increments snd_cwnd and transmits more of his
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) * queue. -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) u32 now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) tcp_measure_rcv_mss(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) tcp_rcv_rtt_measure(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) now = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) if (!icsk->icsk_ack.ato) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) /* The _first_ data packet received, initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) * delayed ACK engine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) icsk->icsk_ack.ato = TCP_ATO_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) int m = now - icsk->icsk_ack.lrcvtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) if (m <= TCP_ATO_MIN / 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) /* The fastest case is the first. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) } else if (m < icsk->icsk_ack.ato) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) if (icsk->icsk_ack.ato > icsk->icsk_rto)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) icsk->icsk_ack.ato = icsk->icsk_rto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) } else if (m > icsk->icsk_rto) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) /* Too long gap. Apparently sender failed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * restart window, so that we send ACKs quickly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) icsk->icsk_ack.lrcvtime = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) tcp_ecn_check_ce(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) if (skb->len >= 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) /* Called to compute a smoothed rtt estimate. The data fed to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) * routine either comes from timestamps, or from segments that were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * known _not_ to have been retransmitted [see Karn/Partridge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) * piece by Van Jacobson.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * NOTE: the next three routines used to be one big routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) * To save cycles in the RFC 1323 implementation it was better to break
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) * it up into three procedures. -- erics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) long m = mrtt_us; /* RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) u32 srtt = tp->srtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) /* The following amusing code comes from Jacobson's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) * article in SIGCOMM '88. Note that rtt and mdev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) * are scaled versions of rtt and mean deviation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) * This is designed to be as fast as possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * m stands for "measurement".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) * On a 1990 paper the rto value is changed to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) * RTO = rtt + 4 * mdev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) * Funny. This algorithm seems to be very broken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * These formulae increase RTO, when it should be decreased, increase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) * too slowly, when it should be increased quickly, decrease too quickly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) * does not matter how to _calculate_ it. Seems, it was trap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) * that VJ failed to avoid. 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (srtt != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) m -= (srtt >> 3); /* m is now error in rtt est */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) srtt += m; /* rtt = 7/8 rtt + 1/8 new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (m < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) m = -m; /* m is now abs(error) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) m -= (tp->mdev_us >> 2); /* similar update on mdev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) /* This is similar to one of Eifel findings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * Eifel blocks mdev updates when rtt decreases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) * This solution is a bit different: we use finer gain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) * for mdev in this case (alpha*beta).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) * Like Eifel it also prevents growth of rto,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) * but also it limits too fast rto decreases,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * happening in pure Eifel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) if (m > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) m >>= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) m -= (tp->mdev_us >> 2); /* similar update on mdev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) if (tp->mdev_us > tp->mdev_max_us) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) tp->mdev_max_us = tp->mdev_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) if (tp->mdev_max_us > tp->rttvar_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) tp->rttvar_us = tp->mdev_max_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (after(tp->snd_una, tp->rtt_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) if (tp->mdev_max_us < tp->rttvar_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) tp->rtt_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) tp->mdev_max_us = tcp_rto_min_us(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) tcp_bpf_rtt(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) /* no previous measure. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) srtt = m << 3; /* take the measured time to be rtt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) tp->mdev_us = m << 1; /* make sure rto = 3*rtt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) tp->mdev_max_us = tp->rttvar_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) tp->rtt_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) tcp_bpf_rtt(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) tp->srtt_us = max(1U, srtt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) static void tcp_update_pacing_rate(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) u64 rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) /* current rate is (cwnd * mss) / srtt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) * In Congestion Avoidance phase, set it to 120 % the current rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * end of slow start and should slow down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) if (tp->snd_cwnd < tp->snd_ssthresh / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) rate *= max(tp->snd_cwnd, tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) if (likely(tp->srtt_us))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) do_div(rate, tp->srtt_us);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) * without any lock. We want to make sure compiler wont store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) * intermediate values in this location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) sk->sk_max_pacing_rate));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) /* Calculate rto without backoff. This is the second half of Van Jacobson's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) * routine referred to above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) static void tcp_set_rto(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) /* Old crap is replaced with new one. 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) * More seriously:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) * 1. If rtt variance happened to be less 50msec, it is hallucination.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) * It cannot be less due to utterly erratic ACK generation made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) * at least by solaris and freebsd. "Erratic ACKs" has _nothing_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) * to do with delayed acks, because at cwnd>2 true delack timeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * is invisible. Actually, Linux-2.4 also generates erratic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * ACKs in some circumstances.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) /* 2. Fixups made earlier cannot be right.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * If we do not estimate RTO correctly without them,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * all the algo is pure shit and should be replaced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * with correct one. It is exactly, which we pretend to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) /* NOTE: clamping at TCP_RTO_MIN is not required, current algo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) * guarantees that rto is higher.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) tcp_bound_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) if (!cwnd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) cwnd = TCP_INIT_CWND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) struct tcp_sacktag_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) /* Timestamps for earliest and latest never-retransmitted segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) * that was SACKed. RTO needs the earliest RTT to stay conservative,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) * but congestion control should still get an accurate delay signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) u64 first_sackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) u64 last_sackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) u32 reord;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) u32 sack_delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) int flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) unsigned int mss_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) struct rate_sample *rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /* Take a notice that peer is sending D-SACKs. Skip update of data delivery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * and spurious retransmission information if this DSACK is unlikely caused by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * sender's action:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * - DSACKed sequence range is larger than maximum receiver's window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * - Total no. of DSACKed segments exceed the total no. of retransmitted segs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) u32 end_seq, struct tcp_sacktag_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) u32 seq_len, dup_segs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) if (!before(start_seq, end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) seq_len = end_seq - start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) /* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (seq_len > tp->max_window)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) if (seq_len > tp->mss_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) tp->dsack_dups += dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) /* Skip the DSACK if dup segs weren't retransmitted by sender */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) if (tp->dsack_dups > tp->total_retrans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) tp->rack.dsack_seen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) state->flag |= FLAG_DSACKING_ACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) /* A spurious retransmission is delivered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) state->sack_delivered += dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) return dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) /* It's reordering when higher sequence was delivered (i.e. sacked) before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) * distance is approximated in full-mss packet distance ("reordering").
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) const int ts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) const u32 mss = tp->mss_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) u32 fack, metric;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) fack = tcp_highest_sack_seq(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) if (!before(low_seq, fack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) metric = fack - low_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) if ((metric > tp->reordering * mss) && mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) #if FASTRETRANS_DEBUG > 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) tp->reordering,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) tp->sacked_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) tp->undo_marker ? tp->undo_retrans : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) tp->reordering = min_t(u32, (metric + mss - 1) / mss,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) /* This exciting event is worth to be remembered. 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) tp->reord_seen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /* This must be called before lost_out or retrans_out are updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) * on a new loss, because we want to know if all skbs previously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) * known to be lost have already been retransmitted, indicating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) * that this newly lost skb is our next skb to retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) (tp->retransmit_skb_hint &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) before(TCP_SKB_CB(skb)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) tp->retransmit_skb_hint = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) /* Sum the number of packets on the wire we have marked as lost, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) * notify the congestion control module that the given skb was marked lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) tp->lost += tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) __u8 sacked = TCP_SKB_CB(skb)->sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) tcp_verify_retransmit_hint(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) if (sacked & TCPCB_LOST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) if (sacked & TCPCB_SACKED_RETRANS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) /* Account for retransmits that are lost again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) tp->retrans_out -= tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) tcp_skb_pcount(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) tcp_notify_skb_loss_event(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) tp->lost_out += tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) tcp_notify_skb_loss_event(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) /* Updates the delivered and delivered_ce counts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) tp->delivered += delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) if (ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) tp->delivered_ce += delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) /* This procedure tags the retransmission queue when SACKs arrive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) * Packets in queue with these bits set are counted in variables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * sacked_out, retrans_out and lost_out, correspondingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * Valid combinations are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * Tag InFlight Description
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) * 0 1 - orig segment is in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * S 0 - nothing flies, orig reached receiver.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * L 0 - nothing flies, orig lost by net.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * R 2 - both orig and retransmit are in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * L|R 1 - orig is lost, retransmit is in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * S|R 1 - orig reached receiver, retrans is still in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * (L|S|R is logically valid, it could occur when L|R is sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * but it is equivalent to plain S and code short-curcuits it to S.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) * L|S is logically invalid, it would mean -1 packet in flight 8))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) * These 6 states form finite state machine, controlled by the following events:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) * 3. Loss detection event of two flavors:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) * A. Scoreboard estimator decided the packet is lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) * A'. Reno "three dupacks" marks head of queue lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) * B. SACK arrives sacking SND.NXT at the moment, when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * segment was retransmitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * 4. D-SACK added new rule: D-SACK changes any tag to S.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * It is pleasant to note, that state diagram turns out to be commutative,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * so that we are allowed not to be bothered by order of our actions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * when multiple events arrive simultaneously. (see the function below).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) * Reordering detection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) * --------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) * Reordering metric is maximal distance, which a packet can be displaced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) * in packet stream. With SACKs we can estimate it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) * 1. SACK fills old hole and the corresponding segment was not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) * ever retransmitted -> reordering. Alas, we cannot use it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) * when segment was retransmitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) * 2. The last flaw is solved with D-SACK. D-SACK arrives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) * for retransmitted and already SACKed segment -> reordering..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) * Both of these heuristics are not used in Loss state, when we cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) * account for retransmits accurately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) * SACK block validation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) * ----------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * SACK block range validation checks that the received SACK block fits to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * Note that SND.UNA is not included to the range though being valid because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * it means that the receiver is rather inconsistent with itself reporting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * SACK reneging when it should advance SND.UNA. Such SACK block this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * perfectly valid, however, in light of RFC2018 which explicitly states
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * that "SACK block MUST reflect the newest segment. Even if the newest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * segment is going to be discarded ...", not that it looks very clever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * in case of head skb. Due to potentional receiver driven attacks, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * choose to avoid immediate execution of a walk in write queue due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) * reneging and defer head skb's loss recovery to standard loss recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) * procedure that will eventually trigger (nothing forbids us doing this).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) * Implements also blockage to start_seq wrap-around. Problem lies in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) * fact that though start_seq (s) is before end_seq (i.e., not reversed),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) * there's no guarantee that it will be before snd_nxt (n). The problem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) * wrap (s_w):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) * <- outs wnd -> <- wrapzone ->
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) * u e n u_w e_w s n_w
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) * | | | | | | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) * |<------------+------+----- TCP seqno space --------------+---------->|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) * ...-- <2^31 ->| |<--------...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * ...---- >2^31 ------>| |<--------...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * Current code wouldn't be vulnerable but it's better still to discard such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * crazy SACK blocks. Doing this check for start_seq alone closes somewhat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * similar case (end_seq after snd_nxt wrap) as earlier reversed check in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) * snd_nxt wrap -> snd_una region will then become "well defined", i.e.,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * equal to the ideal case (infinite seqno space without wrap caused issues).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) * With D-SACK the lower bound is extended to cover sequence space below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) * SND.UNA down to undo_marker, which is the last point of interest. Yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) * again, D-SACK block must not to go across snd_una (for the same reason as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) * for the normal SACK blocks, explained above). But there all simplicity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) * ends, TCP might receive valid D-SACKs below that. As long as they reside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) * fully below undo_marker they do not affect behavior in anyway and can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) * therefore be safely ignored. In rare cases (which are more or less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) * theoretical ones), the D-SACK will nicely cross that boundary due to skb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) * fragmentation and packet reordering past skb's retransmission. To consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) * them correctly, the acceptable range must be extended even more though
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * the exact amount is rather hard to quantify. However, tp->max_window can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) * be used as an exaggerated estimate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) u32 start_seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) /* Too far in future, or reversed (interpretation is ambiguous) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) /* Nasty start_seq wrap-around check (see comments above) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) if (!before(start_seq, tp->snd_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) /* In outstanding window? ...This is valid exit for D-SACKs too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * start_seq == snd_una is non-sensical (see comments above)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) if (after(start_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) if (!is_dsack || !tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) /* ...Then it's D-SACK, and must reside below snd_una completely */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) if (after(end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) if (!before(start_seq, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) /* Too old */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) if (!after(end_seq, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) /* Undo_marker boundary crossing (overestimates a lot). Known already:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * start_seq < undo_marker and end_seq >= undo_marker.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) return !before(start_seq, end_seq - tp->max_window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) struct tcp_sack_block_wire *sp, int num_sacks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) u32 prior_snd_una, struct tcp_sacktag_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) u32 dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) } else if (num_sacks > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) if (!dup_segs) { /* Skip dubious DSACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) /* D-SACK for already forgotten data... Do dumb counting. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) if (tp->undo_marker && tp->undo_retrans > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) !after(end_seq_0, prior_snd_una) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) after(end_seq_0, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) /* Check if skb is fully within the SACK block. In presence of GSO skbs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) * the incoming SACK may not exactly match but we can find smaller MSS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) * aligned portion of it that matches. Therefore we might need to fragment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) * which may fail and creates some hassle (caller must handle error case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) * returns).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) * FIXME: this could be merged to shift decision code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) u32 start_seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) bool in_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) unsigned int pkt_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) unsigned int mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) !before(end_seq, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) if (tcp_skb_pcount(skb) > 1 && !in_sack &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) mss = tcp_skb_mss(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (!in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) if (pkt_len < mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) pkt_len = mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) if (pkt_len < mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) /* Round if necessary so that SACKs cover only full MSSes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) * and/or the remaining small portion (if present)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) if (pkt_len > mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) unsigned int new_len = (pkt_len / mss) * mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) if (!in_sack && new_len < pkt_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) new_len += mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) pkt_len = new_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) if (pkt_len >= skb->len && !in_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) pkt_len, mss, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) return in_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) /* Mark the given newly-SACKed range as such, adjusting counters and hints. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) static u8 tcp_sacktag_one(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) struct tcp_sacktag_state *state, u8 sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) u32 start_seq, u32 end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) int dup_sack, int pcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) u64 xmit_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) /* Account D-SACK for retransmitted packet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (dup_sack && (sacked & TCPCB_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) if (tp->undo_marker && tp->undo_retrans > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) after(end_seq, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) if ((sacked & TCPCB_SACKED_ACKED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) before(start_seq, state->reord))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) state->reord = start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) /* Nothing to do; acked frame is about to be dropped (was ACKed). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) if (!after(end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) return sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) if (!(sacked & TCPCB_SACKED_ACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) tcp_rack_advance(tp, sacked, end_seq, xmit_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) if (sacked & TCPCB_SACKED_RETRANS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) /* If the segment is not tagged as lost,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) * we do not clear RETRANS, believing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) * that retransmission is still in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) if (sacked & TCPCB_LOST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) tp->lost_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) tp->retrans_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) if (!(sacked & TCPCB_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) /* New sack for not retransmitted frame,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) * which was in hole. It is reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) if (before(start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) tcp_highest_sack_seq(tp)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) before(start_seq, state->reord))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) state->reord = start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) if (!after(end_seq, tp->high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) state->flag |= FLAG_ORIG_SACK_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) if (state->first_sackt == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) state->first_sackt = xmit_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) state->last_sackt = xmit_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) if (sacked & TCPCB_LOST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) sacked &= ~TCPCB_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) tp->lost_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) sacked |= TCPCB_SACKED_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) state->flag |= FLAG_DATA_SACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) tp->sacked_out += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) /* Out-of-order packets delivered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) state->sack_delivered += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) if (tp->lost_skb_hint &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) tp->lost_cnt_hint += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) /* D-SACK. We can detect redundant retransmission in S|R and plain R
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) * frames and clear it. undo_retrans is decreased above, L|R frames
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) * are accounted above as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) sacked &= ~TCPCB_SACKED_RETRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) tp->retrans_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) return sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) /* Shift newly-SACKed bytes from this skb to the immediately previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) unsigned int pcount, int shifted, int mss,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) bool dup_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) BUG_ON(!pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) /* Adjust counters and hints for the newly sacked sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) * range but discard the return value since prev is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) * marked. We must tag the range first because the seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) * advancement below implicitly advances
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) * tcp_highest_sack_seq() when skb is highest_sack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) start_seq, end_seq, dup_sack, pcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) tcp_skb_timestamp_us(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) tcp_rate_skb_delivered(sk, skb, state->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) if (skb == tp->lost_skb_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) tp->lost_cnt_hint += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) TCP_SKB_CB(prev)->end_seq += shifted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) TCP_SKB_CB(skb)->seq += shifted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) tcp_skb_pcount_add(prev, pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) tcp_skb_pcount_add(skb, -pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) /* When we're adding to gso_segs == 1, gso_size will be zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) * in theory this shouldn't be necessary but as long as DSACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) * code can come after this skb later on it's better to keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) * setting gso_size to something.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) if (!TCP_SKB_CB(prev)->tcp_gso_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) TCP_SKB_CB(prev)->tcp_gso_size = mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) /* CHECKME: To clear or not to clear? Mimics normal skb currently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) if (tcp_skb_pcount(skb) <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) TCP_SKB_CB(skb)->tcp_gso_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) /* Difference in this won't matter, both ACKed by the same cumul. ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) if (skb->len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) BUG_ON(!tcp_skb_pcount(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) /* Whole SKB was eaten :-) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) if (skb == tp->retransmit_skb_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) tp->retransmit_skb_hint = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) if (skb == tp->lost_skb_hint) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) tp->lost_skb_hint = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) tp->lost_cnt_hint -= tcp_skb_pcount(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) TCP_SKB_CB(prev)->end_seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) if (skb == tcp_highest_sack(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) tcp_advance_highest_sack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) tcp_skb_collapse_tstamp(prev, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) tcp_rtx_queue_unlink_and_free(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) /* I wish gso_size would have a bit more sane initialization than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) * something-or-zero which complicates things
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) static int tcp_skb_seglen(const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) /* Shifting pages past head area doesn't work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) static int skb_can_shift(const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) return !skb_headlen(skb) && skb_is_nonlinear(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) int pcount, int shiftlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) * to make sure not storing more than 65535 * 8 bytes per skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) * even if current MSS is bigger.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) return skb_shift(to, from, shiftlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) /* Try collapsing SACK blocks spanning across multiple skbs to a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) * skb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) u32 start_seq, u32 end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) bool dup_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) struct sk_buff *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) int mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) int pcount = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) int in_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) /* Normally R but no L won't result in plain S */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) if (!dup_sack &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) if (!skb_can_shift(skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) /* This frame is about to be dropped (was ACKed). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) /* Can only happen with delayed DSACK + discard craziness */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) prev = skb_rb_prev(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) if (!prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) if (!tcp_skb_can_collapse(prev, skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) !before(end_seq, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) if (in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) len = skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) pcount = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) mss = tcp_skb_seglen(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) /* TODO: Fix DSACKs to not fragment already SACKed and we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) * drop this restriction as unnecessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) if (mss != tcp_skb_seglen(prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) goto noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) /* CHECKME: This is non-MSS split case only?, this will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) * cause skipped skbs due to advancing loop btw, original
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) * has that feature too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) if (tcp_skb_pcount(skb) <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) goto noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (!in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) /* TODO: head merge to next could be attempted here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) * though it might not be worth of the additional hassle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) * ...we can probably just fallback to what was done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) * previously. We could try merging non-SACKed ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) * as well but it probably isn't going to buy off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) * because later SACKs might again split them, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) * it would make skb timestamp tracking considerably
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) * harder problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) len = end_seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) BUG_ON(len < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) BUG_ON(len > skb->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) /* MSS boundaries should be honoured or else pcount will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) * severely break even though it makes things bit trickier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * Optimize common case to avoid most of the divides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) mss = tcp_skb_mss(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) /* TODO: Fix DSACKs to not fragment already SACKed and we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) * drop this restriction as unnecessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) if (mss != tcp_skb_seglen(prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) if (len == mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) pcount = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) } else if (len < mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) goto noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) pcount = len / mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) len = pcount * mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) /* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) if (!tcp_skb_shift(prev, skb, pcount, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) /* Hole filled allows collapsing with the next as well, this is very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * useful when hole on every nth skb pattern happens
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) skb = skb_rb_next(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) if (!skb_can_shift(skb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) (mss != tcp_skb_seglen(skb)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) if (!tcp_skb_can_collapse(prev, skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) len = skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) pcount = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) if (tcp_skb_shift(prev, skb, pcount, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) tcp_shifted_skb(sk, prev, skb, state, pcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) len, mss, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) return prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) noop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) fallback:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) struct tcp_sack_block *next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) u32 start_seq, u32 end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) bool dup_sack_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) struct sk_buff *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) skb_rbtree_walk_from(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) int in_sack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) bool dup_sack = dup_sack_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) /* queue is in-order => we can short-circuit the walk early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) if (!before(TCP_SKB_CB(skb)->seq, end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) if (next_dup &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) in_sack = tcp_match_skb_to_sack(sk, skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) next_dup->start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) next_dup->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) if (in_sack > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) dup_sack = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) /* skb reference here is a bit tricky to get right, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) * shifting can eat and free both this skb and the next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) * so not even _safe variant of the loop is enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) if (in_sack <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) tmp = tcp_shift_skb_data(sk, skb, state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) start_seq, end_seq, dup_sack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) if (tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) if (tmp != skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) skb = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) in_sack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) in_sack = tcp_match_skb_to_sack(sk, skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) if (unlikely(in_sack < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) if (in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) TCP_SKB_CB(skb)->sacked =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) tcp_sacktag_one(sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) TCP_SKB_CB(skb)->sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) TCP_SKB_CB(skb)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) TCP_SKB_CB(skb)->end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) dup_sack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) tcp_skb_pcount(skb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) tcp_skb_timestamp_us(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) tcp_rate_skb_delivered(sk, skb, state->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) list_del_init(&skb->tcp_tsorted_anchor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) if (!before(TCP_SKB_CB(skb)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) tcp_highest_sack_seq(tp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) tcp_advance_highest_sack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) skb = rb_to_skb(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) if (before(seq, TCP_SKB_CB(skb)->seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) p = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) u32 skip_to_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) return tcp_sacktag_bsearch(sk, skip_to_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) struct tcp_sack_block *next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) u32 skip_to_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) if (!next_dup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (before(next_dup->start_seq, skip_to_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) skb = tcp_sacktag_walk(skb, sk, NULL, state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) next_dup->start_seq, next_dup->end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) u32 prior_snd_una, struct tcp_sacktag_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) const unsigned char *ptr = (skb_transport_header(ack_skb) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) TCP_SKB_CB(ack_skb)->sacked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) struct tcp_sack_block sp[TCP_NUM_SACKS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) struct tcp_sack_block *cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) int used_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) bool found_dup_sack = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) int first_sack_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) state->flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) state->reord = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) if (!tp->sacked_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) tcp_highest_sack_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) num_sacks, prior_snd_una, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) /* Eliminate too old ACKs, but take into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) * account more or less fresh ones, they can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) * contain valid SACK info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) if (!tp->packets_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) used_sacks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) first_sack_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) for (i = 0; i < num_sacks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) bool dup_sack = !i && found_dup_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) if (!tcp_is_sackblock_valid(tp, dup_sack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) sp[used_sacks].start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) sp[used_sacks].end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) if (dup_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) if (!tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) /* Don't count olds caused by ACK reordering */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) !after(sp[used_sacks].end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) mib_idx = LINUX_MIB_TCPSACKDISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) if (i == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) first_sack_index = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) /* Ignore very old stuff early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) if (i == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) first_sack_index = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) used_sacks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) /* order SACK blocks to allow in order walk of the retrans queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) for (i = used_sacks - 1; i > 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) for (j = 0; j < i; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) swap(sp[j], sp[j + 1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) /* Track where the first SACK block goes to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) if (j == first_sack_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) first_sack_index = j + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) state->mss_now = tcp_current_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) if (!tp->sacked_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) /* It's already past, so skip checking against it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) cache = tp->recv_sack_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) /* Skip empty blocks in at head of the cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) !cache->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) while (i < used_sacks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) u32 start_seq = sp[i].start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) u32 end_seq = sp[i].end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) bool dup_sack = (found_dup_sack && (i == first_sack_index));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) struct tcp_sack_block *next_dup = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) if (found_dup_sack && ((i + 1) == first_sack_index))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) next_dup = &sp[i + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) /* Skip too early cached blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) while (tcp_sack_cache_ok(tp, cache) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) !before(start_seq, cache->end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) /* Can skip some work by looking recv_sack_cache? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) after(end_seq, cache->start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) /* Head todo? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) if (before(start_seq, cache->start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) skb = tcp_sacktag_skip(skb, sk, start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) skb = tcp_sacktag_walk(skb, sk, next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) cache->start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) dup_sack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) /* Rest of the block already fully processed? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) if (!after(end_seq, cache->end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) goto advance_sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) cache->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) /* ...tail remains todo... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) if (tcp_highest_sack_seq(tp) == cache->end_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) /* ...but better entrypoint exists! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) skb = tcp_highest_sack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) goto walk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) /* Check overlap against next cached too (past this one already) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) if (!before(start_seq, tcp_highest_sack_seq(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) skb = tcp_highest_sack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) skb = tcp_sacktag_skip(skb, sk, start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) walk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) skb = tcp_sacktag_walk(skb, sk, next_dup, state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) start_seq, end_seq, dup_sack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) advance_sp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) /* Clear the head of the cache sack blocks so we can skip it next time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) tp->recv_sack_cache[i].start_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) tp->recv_sack_cache[i].end_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) for (j = 0; j < used_sacks; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) tp->recv_sack_cache[i++] = sp[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) tcp_check_sack_reordering(sk, state->reord, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) #if FASTRETRANS_DEBUG > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) WARN_ON((int)tp->sacked_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) WARN_ON((int)tp->lost_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) WARN_ON((int)tp->retrans_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) WARN_ON((int)tcp_packets_in_flight(tp) < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) return state->flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) /* Limits sacked_out so that sum with lost_out isn't ever larger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) * packets_out. Returns false if sacked_out adjustement wasn't necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) u32 holes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) holes = max(tp->lost_out, 1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) holes = min(holes, tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) if ((tp->sacked_out + holes) > tp->packets_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) tp->sacked_out = tp->packets_out - holes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /* If we receive more dupacks than we expected counting segments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) * in assumption of absent reordering, interpret this as reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) * The only another reason could be bug in receiver TCP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) static void tcp_check_reno_reordering(struct sock *sk, const int addend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) if (!tcp_limit_reno_sacked(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) tp->reordering = min_t(u32, tp->packets_out + addend,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) tp->reord_seen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) /* Emulate SACKs for SACKless connection: account for a new dupack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) if (num_dupack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) u32 prior_sacked = tp->sacked_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) s32 delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) tp->sacked_out += num_dupack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) tcp_check_reno_reordering(sk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) delivered = tp->sacked_out - prior_sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) if (delivered > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) tcp_count_delivered(tp, delivered, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) /* Account for ACK, ACKing some data in Reno Recovery phase. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) if (acked > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) /* One ACK acked hole. The rest eat duplicate ACKs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) if (acked - 1 >= tp->sacked_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) tp->sacked_out -= acked - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) tcp_check_reno_reordering(sk, acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) void tcp_clear_retrans(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) tp->retrans_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) tp->lost_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) tp->undo_retrans = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) static inline void tcp_init_undo(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) tp->undo_marker = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) /* Retransmission still in flight may cause DSACKs later. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) tp->undo_retrans = tp->retrans_out ? : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) static bool tcp_is_rack(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) /* If we detect SACK reneging, forget all SACK information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) * and reset tags completely, otherwise preserve SACKs. If receiver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) * dropped its ofo queue, we will know this due to reneging detection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) static void tcp_timeout_mark_lost(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) struct sk_buff *skb, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) bool is_reneg; /* is receiver reneging on SACKs? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) head = tcp_rtx_queue_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) if (is_reneg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) /* Mark SACK reneging until we recover from this loss event. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) tp->is_sack_reneg = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) } else if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) skb = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) skb_rbtree_walk_from(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) if (is_reneg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) else if (tcp_is_rack(sk) && skb != head &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) tcp_rack_skb_timeout(tp, skb, 0) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) continue; /* Don't mark recently sent ones lost yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) tcp_mark_skb_lost(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) tcp_clear_all_retrans_hints(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) /* Enter Loss state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) void tcp_enter_loss(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) tcp_timeout_mark_lost(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) /* Reduce ssthresh if it has not yet been made inside this window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) !after(tp->high_seq, tp->snd_una) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) tp->prior_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) tp->prior_cwnd = tp->snd_cwnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) tcp_ca_event(sk, CA_EVENT_LOSS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) tcp_init_undo(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) tp->snd_cwnd_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) /* Timeout in disordered state after receiving substantial DUPACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) * suggests that the degree of reordering is over-estimated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) tp->reordering = min_t(unsigned int, tp->reordering,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) net->ipv4.sysctl_tcp_reordering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) tcp_set_ca_state(sk, TCP_CA_Loss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) tcp_ecn_queue_cwr(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) * loss recovery is underway except recurring timeout(s) on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) tp->frto = net->ipv4.sysctl_tcp_frto &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) (new_recovery || icsk->icsk_retransmits) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) !inet_csk(sk)->icsk_mtup.probe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) /* If ACK arrived pointing to a remembered SACK, it means that our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) * remembered SACKs do not reflect real state of receiver i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) * receiver _host_ is heavily congested (or buggy).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) * To avoid big spurious retransmission bursts due to transient SACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) * scoreboard oddities that look like reneging, we give the receiver a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) * little time (max(RTT/2, 10ms)) to send us some more ACKs that will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) * restore sanity to the SACK scoreboard. If the apparent reneging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) * persists until this RTO then we'll clear the SACK scoreboard.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) static bool tcp_check_sack_reneging(struct sock *sk, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) if (flag & FLAG_SACK_RENEGING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) msecs_to_jiffies(10));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) delay, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) * counter when SACK is enabled (without SACK, sacked_out is used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * that purpose).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) * With reordering, holes may still be in flight, so RFC3517 recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) * uses pure sacked_out (total number of SACKed segments) even though
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) * it violates the RFC that uses duplicate ACKs, often these are equal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) * but when e.g. out-of-window ACKs or packet duplication occurs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) * they differ. Since neither occurs due to loss, TCP should really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) * ignore them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) return tp->sacked_out + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) /* Linux NewReno/SACK/ECN state machine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) * --------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) * "Open" Normal state, no dubious events, fast path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) * "Disorder" In all the respects it is "Open",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) * but requires a bit more attention. It is entered when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) * we see some SACKs or dupacks. It is split of "Open"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) * mainly to move some processing from fast path to slow one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) * "CWR" CWND was reduced due to some Congestion Notification event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) * It can be ECN, ICMP source quench, local device congestion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) * "Recovery" CWND was reduced, we are fast-retransmitting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) * "Loss" CWND was reduced due to RTO timeout or SACK reneging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) * tcp_fastretrans_alert() is entered:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) * - each incoming ACK, if state is not "Open"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) * - when arrived ACK is unusual, namely:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) * * SACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) * * Duplicate ACK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) * * ECN ECE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) * Counting packets in flight is pretty simple.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) * in_flight = packets_out - left_out + retrans_out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) * packets_out is SND.NXT-SND.UNA counted in packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) * retrans_out is number of retransmitted segments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) * left_out is number of segments left network, but not ACKed yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) * left_out = sacked_out + lost_out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) * sacked_out: Packets, which arrived to receiver out of order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) * and hence not ACKed. With SACKs this number is simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) * amount of SACKed data. Even without SACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) * it is easy to give pretty reliable estimate of this number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) * counting duplicate ACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) * lost_out: Packets lost by network. TCP has no explicit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) * "loss notification" feedback from network (for now).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) * It means that this number can be only _guessed_.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) * Actually, it is the heuristics to predict lossage that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * distinguishes different algorithms.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * F.e. after RTO, when all the queue is considered as lost,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) * lost_out = packets_out and in_flight = retrans_out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) * Essentially, we have now a few algorithms detecting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) * lost packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) * If the receiver supports SACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) * RFC6675/3517: It is the conventional algorithm. A packet is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) * considered lost if the number of higher sequence packets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) * SACKed is greater than or equal the DUPACK thoreshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) * (reordering). This is implemented in tcp_mark_head_lost and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) * tcp_update_scoreboard.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) * RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) * (2017-) that checks timing instead of counting DUPACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) * Essentially a packet is considered lost if it's not S/ACKed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) * after RTT + reordering_window, where both metrics are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) * dynamically measured and adjusted. This is implemented in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) * tcp_rack_mark_lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) * If the receiver does not support SACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) * NewReno (RFC6582): in Recovery we assume that one segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) * is lost (classic Reno). While we are in Recovery and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) * a partial ACK arrives, we assume that one more packet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) * is lost (NewReno). This heuristics are the same in NewReno
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) * and SACK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) * Really tricky (and requiring careful tuning) part of algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) * The first determines the moment _when_ we should reduce CWND and,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) * hence, slow down forward transmission. In fact, it determines the moment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) * when we decide that hole is caused by loss, rather than by a reorder.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) * holes, caused by lost packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) * And the most logically complicated part of algorithm is undo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) * heuristics. We detect false retransmits due to both too early
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) * fast retransmit (reordering) and underestimated RTO, analyzing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) * timestamps and D-SACKs. When we detect that some segments were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) * retransmitted by mistake and CWND reduction was wrong, we undo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) * window reduction and abort recovery phase. This logic is hidden
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) * inside several functions named tcp_try_undo_<something>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) /* This function decides, when we should leave Disordered state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) * and enter Recovery phase, reducing congestion window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) * Main question: may we further continue forward transmission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) * with the same cwnd?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) static bool tcp_time_to_recover(struct sock *sk, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) /* Trick#1: The loss is proven. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) if (tp->lost_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) /* Not-A-Trick#2 : Classic rule... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) /* Detect loss in event "A" above by marking head of queue up as lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) * For RFC3517 SACK, a segment is considered lost if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * has at least tp->reordering SACKed seqments above it; "packets" refers to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) * the maximum SACKed segments to pass before reaching this limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) int cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) /* Use SACK to deduce losses of new sequences sent during recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) const u32 loss_high = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) WARN_ON(packets > tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) skb = tp->lost_skb_hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) /* Head already handled? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) cnt = tp->lost_cnt_hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) skb = tcp_rtx_queue_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) skb_rbtree_walk_from(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) /* TODO: do this better */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) /* this is not the most efficient way to do this... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) tp->lost_skb_hint = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) tp->lost_cnt_hint = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) cnt += tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) if (cnt > packets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) tcp_mark_skb_lost(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) if (mark_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) /* Account newly detected lost packet(s) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) if (tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) int sacked_upto = tp->sacked_out - tp->reordering;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) if (sacked_upto >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) tcp_mark_head_lost(sk, sacked_upto, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) else if (fast_rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) tcp_mark_head_lost(sk, 1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) before(tp->rx_opt.rcv_tsecr, when);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) /* skb is spurious retransmitted if the returned timestamp echo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) * reply is prior to the skb transmission time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) /* Nothing was retransmitted or returned timestamp is less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) * than timestamp of the first retransmission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) return tp->retrans_stamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) /* Undo procedures. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) /* We can clear retrans_stamp when there are no retransmissions in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) * window. It would seem that it is trivially available for us in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) * tp->retrans_out, however, that kind of assumptions doesn't consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) * what will happen if errors occur when sending retransmission for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) * second time. ...It could the that such segment has only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) * TCPCB_EVER_RETRANS set at the present time. It seems that checking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) * the head skb is enough except for some reneging corner cases that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) * are not worth the effort.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) * Main reason for all this complexity is the fact that connection dying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) * time now depends on the validity of the retrans_stamp, in particular,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) * that successive retransmissions of a segment must not advance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) * retrans_stamp under any conditions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) static bool tcp_any_retrans_done(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) if (tp->retrans_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) skb = tcp_rtx_queue_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) static void DBGUNDO(struct sock *sk, const char *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) #if FASTRETRANS_DEBUG > 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) struct inet_sock *inet = inet_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) if (sk->sk_family == AF_INET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) &inet->inet_daddr, ntohs(inet->inet_dport),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) tp->snd_cwnd, tcp_left_out(tp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) tp->snd_ssthresh, tp->prior_ssthresh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) #if IS_ENABLED(CONFIG_IPV6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) else if (sk->sk_family == AF_INET6) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) &sk->sk_v6_daddr, ntohs(inet->inet_dport),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) tp->snd_cwnd, tcp_left_out(tp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) tp->snd_ssthresh, tp->prior_ssthresh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) if (unmark_loss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) tp->lost_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) tcp_clear_all_retrans_hints(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) if (tp->prior_ssthresh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) if (tp->prior_ssthresh > tp->snd_ssthresh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) tp->snd_ssthresh = tp->prior_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) tcp_ecn_withdraw_cwr(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) tp->rack.advanced = 1; /* Force RACK to re-exam losses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) static inline bool tcp_may_undo(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) /* People celebrate: "We love our President!" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) static bool tcp_try_undo_recovery(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) if (tcp_may_undo(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) /* Happy end! We did not retransmit anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) * or our original transmission succeeded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) tcp_undo_cwnd_reduction(sk, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) mib_idx = LINUX_MIB_TCPLOSSUNDO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) mib_idx = LINUX_MIB_TCPFULLUNDO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) } else if (tp->rack.reo_wnd_persist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) tp->rack.reo_wnd_persist--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) /* Hold old state until something *above* high_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) * is ACKed. For Reno it is MUST to prevent false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) * fast retransmits (RFC2582). SACK TCP is safe. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) if (!tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) tcp_set_ca_state(sk, TCP_CA_Open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) tp->is_sack_reneg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) static bool tcp_try_undo_dsack(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) if (tp->undo_marker && !tp->undo_retrans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) tp->rack.reo_wnd_persist + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) DBGUNDO(sk, "D-SACK");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) tcp_undo_cwnd_reduction(sk, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) /* Undo during loss recovery after partial ACK or using F-RTO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) if (frto_undo || tcp_may_undo(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) tcp_undo_cwnd_reduction(sk, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) DBGUNDO(sk, "partial loss");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) if (frto_undo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) LINUX_MIB_TCPSPURIOUSRTOS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) inet_csk(sk)->icsk_retransmits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) if (frto_undo || tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) tcp_set_ca_state(sk, TCP_CA_Open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) tp->is_sack_reneg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) /* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) * It computes the number of packets to send (sndcnt) based on packets newly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) * delivered:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) * 1) If the packets in flight is larger than ssthresh, PRR spreads the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) * cwnd reductions across a full RTT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) * 2) Otherwise PRR uses packet conservation to send as much as delivered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) * But when the retransmits are acked without further losses, PRR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) * slow starts cwnd up to ssthresh to speed up the recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) static void tcp_init_cwnd_reduction(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) tp->snd_cwnd_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) tp->prior_cwnd = tp->snd_cwnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) tp->prr_delivered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) tp->prr_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) tcp_ecn_queue_cwr(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) int sndcnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) tp->prr_delivered += newly_acked_sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) if (delta < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) tp->prior_cwnd - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) FLAG_RETRANS_DATA_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) sndcnt = min_t(int, delta,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) max_t(int, tp->prr_delivered - tp->prr_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) newly_acked_sacked) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) sndcnt = min(delta, newly_acked_sacked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) /* Force a fast retransmit upon entering fast recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) static inline void tcp_end_cwnd_reduction(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) if (inet_csk(sk)->icsk_ca_ops->cong_control)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) tp->snd_cwnd = tp->snd_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) void tcp_enter_cwr(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) tcp_init_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) tcp_set_ca_state(sk, TCP_CA_CWR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) EXPORT_SYMBOL(tcp_enter_cwr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) static void tcp_try_keep_open(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) int state = TCP_CA_Open;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) state = TCP_CA_Disorder;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) if (inet_csk(sk)->icsk_ca_state != state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) tcp_set_ca_state(sk, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) static void tcp_try_to_open(struct sock *sk, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) if (!tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) if (flag & FLAG_ECE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) tcp_enter_cwr(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) static void tcp_mtup_probe_failed(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) icsk->icsk_mtup.probe_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) static void tcp_mtup_probe_success(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) /* FIXME: breaks with very large cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) tp->prior_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) tp->snd_cwnd = tp->snd_cwnd *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) tcp_mss_to_mtu(sk, tp->mss_cache) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) icsk->icsk_mtup.probe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) tp->snd_cwnd_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) tp->snd_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) icsk->icsk_mtup.probe_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) /* Do a simple retransmit without using the backoff mechanisms in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) * tcp_timer. This is used for path mtu discovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) * The socket is already locked here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) void tcp_simple_retransmit(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) unsigned int mss = tcp_current_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) if (tcp_skb_seglen(skb) > mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) tcp_mark_skb_lost(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) tcp_clear_retrans_hints_partial(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) if (!tp->lost_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) tcp_limit_reno_sacked(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) /* Don't muck with the congestion window here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) * Reason is that we do not increase amount of _data_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) * in network, but units changed and effective
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) * cwnd/ssthresh really reduced now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) if (icsk->icsk_ca_state != TCP_CA_Loss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) tp->snd_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) tcp_set_ca_state(sk, TCP_CA_Loss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) tcp_xmit_retransmit_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) EXPORT_SYMBOL(tcp_simple_retransmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) void tcp_enter_recovery(struct sock *sk, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) mib_idx = LINUX_MIB_TCPRENORECOVERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) mib_idx = LINUX_MIB_TCPSACKRECOVERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) tcp_init_undo(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) if (!tcp_in_cwnd_reduction(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) if (!ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) tp->prior_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) tcp_init_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) tcp_set_ca_state(sk, TCP_CA_Recovery);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) * recovered or spurious. Otherwise retransmits more on partial ACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) int *rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) bool recovered = !before(tp->snd_una, tp->high_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) tcp_try_undo_loss(sk, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) /* Step 3.b. A timeout is spurious if not all data are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) * lost, i.e., never-retransmitted data are (s)acked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) if ((flag & FLAG_ORIG_SACK_ACKED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) tcp_try_undo_loss(sk, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) if (after(tp->snd_nxt, tp->high_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) if (flag & FLAG_DATA_SACKED || num_dupack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) tp->frto = 0; /* Step 3.a. loss was real */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) /* Step 2.b. Try send new data (but deferred until cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) * is updated in tcp_ack()). Otherwise fall back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) * the conventional recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) if (!tcp_write_queue_empty(sk) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) after(tcp_wnd_end(tp), tp->snd_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) *rexmit = REXMIT_NEW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) tp->frto = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) if (recovered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) tcp_try_undo_recovery(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) /* A Reno DUPACK means new data in F-RTO step 2.b above are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) * delivered. Lower inflight to clock out (re)tranmissions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) else if (flag & FLAG_SND_UNA_ADVANCED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) *rexmit = REXMIT_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) static bool tcp_force_fast_retransmit(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) return after(tcp_highest_sack_seq(tp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) tp->snd_una + tp->reordering * tp->mss_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) /* Undo during fast recovery after partial ACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) bool *do_lost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) if (tp->undo_marker && tcp_packet_delayed(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) /* Plain luck! Hole if filled with delayed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) * packet, rather than with a retransmit. Check reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) tcp_check_sack_reordering(sk, prior_snd_una, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) /* We are getting evidence that the reordering degree is higher
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) * than we realized. If there are no retransmits out then we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) * can undo. Otherwise we clock out new packets but do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) * mark more packets lost or retransmit more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) if (tp->retrans_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) if (!tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) DBGUNDO(sk, "partial recovery");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) tcp_undo_cwnd_reduction(sk, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) /* Partial ACK arrived. Force fast retransmit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) *do_lost = tcp_force_fast_retransmit(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) if (tcp_rtx_queue_empty(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) if (unlikely(tcp_is_reno(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) } else if (tcp_is_rack(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) u32 prior_retrans = tp->retrans_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) if (tcp_rack_mark_lost(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) *ack_flag &= ~FLAG_SET_XMIT_TIMER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) if (prior_retrans > tp->retrans_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) *ack_flag |= FLAG_LOST_RETRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) /* Process an event, which can update packets-in-flight not trivially.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) * Main goal of this function is to calculate new estimate for left_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) * taking into account both packets sitting in receiver's buffer and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) * packets lost by network.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) * Besides that it updates the congestion state when packet loss or ECN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) * is detected. But it does not reduce the cwnd, it is done by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) * congestion control later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) * It does _not_ decide what to send, it is made in function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) * tcp_xmit_retransmit_queue().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) int num_dupack, int *ack_flag, int *rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) int fast_rexmit = 0, flag = *ack_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) bool ece_ack = flag & FLAG_ECE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) tcp_force_fast_retransmit(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) if (!tp->packets_out && tp->sacked_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) /* Now state machine starts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) if (ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) /* B. In all the states check for reneging SACKs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) if (tcp_check_sack_reneging(sk, flag))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) /* C. Check consistency of the current state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) /* D. Check state exit conditions. State can be terminated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) * when high_seq is ACKed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) if (icsk->icsk_ca_state == TCP_CA_Open) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) WARN_ON(tp->retrans_out != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) } else if (!before(tp->snd_una, tp->high_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) switch (icsk->icsk_ca_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) case TCP_CA_CWR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) /* CWR is to be held something *above* high_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) * is ACKed for CWR bit to reach receiver. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) if (tp->snd_una != tp->high_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) tcp_end_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) tcp_set_ca_state(sk, TCP_CA_Open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) case TCP_CA_Recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) if (tcp_try_undo_recovery(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) tcp_end_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) /* E. Process state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) switch (icsk->icsk_ca_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) case TCP_CA_Recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) if (!(flag & FLAG_SND_UNA_ADVANCED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) tcp_add_reno_sack(sk, num_dupack, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) if (tcp_try_undo_dsack(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) tcp_identify_packet_loss(sk, ack_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) if (icsk->icsk_ca_state != TCP_CA_Recovery) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) if (!tcp_time_to_recover(sk, flag))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) /* Undo reverts the recovery state. If loss is evident,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) * starts a new recovery (e.g. reordering then loss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) tcp_enter_recovery(sk, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) case TCP_CA_Loss:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) tcp_process_loss(sk, flag, num_dupack, rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) tcp_identify_packet_loss(sk, ack_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) if (!(icsk->icsk_ca_state == TCP_CA_Open ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) (*ack_flag & FLAG_LOST_RETRANS)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) /* Change state if cwnd is undone or retransmits are lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) if (flag & FLAG_SND_UNA_ADVANCED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) tcp_add_reno_sack(sk, num_dupack, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) if (icsk->icsk_ca_state <= TCP_CA_Disorder)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) tcp_try_undo_dsack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) tcp_identify_packet_loss(sk, ack_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) if (!tcp_time_to_recover(sk, flag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) tcp_try_to_open(sk, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) /* MTU probe failure: don't reduce cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) if (icsk->icsk_ca_state < TCP_CA_CWR &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) icsk->icsk_mtup.probe_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) tp->snd_una == tp->mtu_probe.probe_seq_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) tcp_mtup_probe_failed(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) /* Restores the reduction we did in tcp_mtup_probe() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) tp->snd_cwnd++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) tcp_simple_retransmit(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) /* Otherwise enter Recovery state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) tcp_enter_recovery(sk, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) fast_rexmit = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) if (!tcp_is_rack(sk) && do_lost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) tcp_update_scoreboard(sk, fast_rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) *rexmit = REXMIT_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) /* If the remote keeps returning delayed ACKs, eventually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) * the min filter would pick it up and overestimate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) * prop. delay when it expires. Skip suspected delayed ACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) rtt_us ? : jiffies_to_usecs(1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) long seq_rtt_us, long sack_rtt_us,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) long ca_rtt_us, struct rate_sample *rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) /* Prefer RTT measured from ACK's timing to TS-ECR. This is because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) * broken middle-boxes or peers may corrupt TS-ECR fields. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) * Karn's algorithm forbids taking RTT if some retransmitted data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) * is acked (RFC6298).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) if (seq_rtt_us < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) seq_rtt_us = sack_rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) /* RTTM Rule: A TSecr value received in a segment is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) * update the averaged RTT measurement only if the segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) * acknowledges some new data, i.e., only if it advances the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) * left edge of the send window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) * See draft-ietf-tcplw-high-performance-00, section 3.3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) flag & FLAG_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) if (!delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) delta = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) ca_rtt_us = seq_rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) if (seq_rtt_us < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) /* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) * always taken together with ACK, SACK, or TS-opts. Any negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) * values will be skipped with the seq_rtt_us < 0 check above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) tcp_update_rtt_min(sk, ca_rtt_us, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) tcp_rtt_estimator(sk, seq_rtt_us);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) tcp_set_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) /* RFC6298: only reset backoff on valid RTT measurement. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) inet_csk(sk)->icsk_backoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) struct rate_sample rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) long rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) /* Restart timer after forward progress on connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) * RFC2988 recommends to restart timer to now+rto.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) void tcp_rearm_rto(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) /* If the retrans timer is currently being used by Fast Open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) * for SYN-ACK retrans purpose, stay put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) if (rcu_access_pointer(tp->fastopen_rsk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) if (!tp->packets_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) u32 rto = inet_csk(sk)->icsk_rto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) /* Offset the time elapsed after installing regular RTO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) s64 delta_us = tcp_rto_delta_us(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) /* delta_us may not be positive if the socket is locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) * when the retrans timer fires and is rescheduled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) rto = usecs_to_jiffies(max_t(int, delta_us, 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) /* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) static void tcp_set_xmit_timer(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) if (!tcp_schedule_loss_probe(sk, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) tcp_rearm_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) /* If we get here, the whole TSO packet has not been acked. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) u32 packets_acked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) packets_acked = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) packets_acked -= tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) if (packets_acked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) BUG_ON(tcp_skb_pcount(skb) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) return packets_acked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) u32 prior_snd_una)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) const struct skb_shared_info *shinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) shinfo = skb_shinfo(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) if (!before(shinfo->tskey, prior_snd_una) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) tcp_skb_tsorted_save(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) } tcp_skb_tsorted_restore(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) /* Remove acknowledged frames from the retransmission queue. If our packet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) * is before the ack sequence we can discard it as it's confirmed to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) * arrived at the other end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) u32 prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) struct tcp_sacktag_state *sack, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) u64 first_ackt, last_ackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) u32 prior_sacked = tp->sacked_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) struct sk_buff *skb, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) bool fully_acked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) long sack_rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) long seq_rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) long ca_rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) u32 pkts_acked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) u32 last_in_flight = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) bool rtt_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) int flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) first_ackt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) const u32 start_seq = scb->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) u8 sacked = scb->sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) u32 acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) /* Determine how many packets and what bytes were acked, tso and else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) if (after(scb->end_seq, tp->snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) if (tcp_skb_pcount(skb) == 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) !after(tp->snd_una, scb->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) acked_pcount = tcp_tso_acked(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) if (!acked_pcount)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) fully_acked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) acked_pcount = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) if (unlikely(sacked & TCPCB_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) if (sacked & TCPCB_SACKED_RETRANS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) tp->retrans_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) flag |= FLAG_RETRANS_DATA_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) } else if (!(sacked & TCPCB_SACKED_ACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) last_ackt = tcp_skb_timestamp_us(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) WARN_ON_ONCE(last_ackt == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) if (!first_ackt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) first_ackt = last_ackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) if (before(start_seq, reord))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) reord = start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) if (!after(scb->end_seq, tp->high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) flag |= FLAG_ORIG_SACK_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) if (sacked & TCPCB_SACKED_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) tp->sacked_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) } else if (tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) tcp_count_delivered(tp, acked_pcount, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) if (!tcp_skb_spurious_retrans(tp, skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) tcp_rack_advance(tp, sacked, scb->end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) tcp_skb_timestamp_us(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) if (sacked & TCPCB_LOST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) tp->lost_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) tp->packets_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) pkts_acked += acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) tcp_rate_skb_delivered(sk, skb, sack->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) /* Initial outgoing SYN's get put onto the write_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) * just like anything else we transmit. It is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) * true data, and if we misinform our callers that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) * this ACK acks real data, we will erroneously exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) * connection startup slow start one packet too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) * quickly. This is severely frowned upon behavior.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) flag |= FLAG_DATA_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) flag |= FLAG_SYN_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) if (!fully_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) tcp_ack_tstamp(sk, skb, prior_snd_una);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) next = skb_rb_next(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) if (unlikely(skb == tp->retransmit_skb_hint))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) tp->retransmit_skb_hint = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) if (unlikely(skb == tp->lost_skb_hint))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) tp->lost_skb_hint = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) tcp_highest_sack_replace(sk, skb, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) tcp_rtx_queue_unlink_and_free(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) tp->snd_up = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) tcp_ack_tstamp(sk, skb, prior_snd_una);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) flag |= FLAG_SACK_RENEGING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) last_in_flight && !prior_sacked && fully_acked &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) sack->rate->prior_delivered + 1 == tp->delivered &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) /* Conservatively mark a delayed ACK. It's typically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) * from a lone runt packet over the round trip to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) * a receiver w/o out-of-order or CE events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) flag |= FLAG_ACK_MAYBE_DELAYED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) if (sack->first_sackt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) ca_rtt_us, sack->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) if (flag & FLAG_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) if (unlikely(icsk->icsk_mtup.probe_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) tcp_mtup_probe_success(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) /* If any of the cumulatively ACKed segments was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) * retransmitted, non-SACK case cannot confirm that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) * progress was due to original transmission due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) * lack of TCPCB_SACKED_ACKED bits even if some of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) * the packets may have been never retransmitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) if (flag & FLAG_RETRANS_DATA_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) flag &= ~FLAG_ORIG_SACK_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) int delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) /* Non-retransmitted hole got filled? That's reordering */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) if (before(reord, prior_fack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) tcp_check_sack_reordering(sk, reord, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) delta = prior_sacked - tp->sacked_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) } else if (skb && rtt_update && sack_rtt_us >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) tcp_skb_timestamp_us(skb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) /* Do not re-arm RTO if the sack RTT is measured from data sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) * after when the head was last (re)transmitted. Otherwise the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) * timeout may continue to extend in loss recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) if (icsk->icsk_ca_ops->pkts_acked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) struct ack_sample sample = { .pkts_acked = pkts_acked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) .rtt_us = sack->rate->rtt_us,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) .in_flight = last_in_flight };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) icsk->icsk_ca_ops->pkts_acked(sk, &sample);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) #if FASTRETRANS_DEBUG > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) WARN_ON((int)tp->sacked_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) WARN_ON((int)tp->lost_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) WARN_ON((int)tp->retrans_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) if (!tp->packets_out && tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) if (tp->lost_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) pr_debug("Leak l=%u %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) tp->lost_out, icsk->icsk_ca_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) tp->lost_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) if (tp->sacked_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) pr_debug("Leak s=%u %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) tp->sacked_out, icsk->icsk_ca_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) if (tp->retrans_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) pr_debug("Leak r=%u %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) tp->retrans_out, icsk->icsk_ca_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) tp->retrans_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) return flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) static void tcp_ack_probe(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) struct sk_buff *head = tcp_send_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) /* Was it a usable window open? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) icsk->icsk_backoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) icsk->icsk_probes_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) /* Socket must be waked up by subsequent tcp_data_snd_check().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) * This function is not for random using!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) when = tcp_clamp_probe0_to_user_timeout(sk, when);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) /* Decide wheather to run the increase function of congestion control. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) /* If reordering is high then always grow cwnd whenever data is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) * delivered regardless of its ordering. Otherwise stay conservative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) * new SACK or ECE mark may first advance cwnd here and later reduce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) * cwnd in tcp_fastretrans_alert() based on more states.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) return flag & FLAG_FORWARD_PROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) return flag & FLAG_DATA_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) /* The "ultimate" congestion control function that aims to replace the rigid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) * It's called toward the end of processing an ACK with precise rate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) * information. All transmission or retransmission are delayed afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) int flag, const struct rate_sample *rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) if (icsk->icsk_ca_ops->cong_control) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) icsk->icsk_ca_ops->cong_control(sk, rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) if (tcp_in_cwnd_reduction(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) /* Reduce cwnd if state mandates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) tcp_cwnd_reduction(sk, acked_sacked, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) } else if (tcp_may_raise_cwnd(sk, flag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) /* Advance cwnd if state allows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) tcp_cong_avoid(sk, ack, acked_sacked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) tcp_update_pacing_rate(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) /* Check that window update is acceptable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) * The function assumes that snd_una<=ack<=snd_next.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) static inline bool tcp_may_update_window(const struct tcp_sock *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) const u32 ack, const u32 ack_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) const u32 nwin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) return after(ack, tp->snd_una) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) after(ack_seq, tp->snd_wl1) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) /* If we update tp->snd_una, also update tp->bytes_acked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) u32 delta = ack - tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) sock_owned_by_me((struct sock *)tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) tp->bytes_acked += delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) tp->snd_una = ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) /* If we update tp->rcv_nxt, also update tp->bytes_received */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) u32 delta = seq - tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) sock_owned_by_me((struct sock *)tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) tp->bytes_received += delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) WRITE_ONCE(tp->rcv_nxt, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) /* Update our send window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) * and in FreeBSD. NetBSD's one is even worse.) is wrong.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) u32 ack_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) int flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) u32 nwin = ntohs(tcp_hdr(skb)->window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) if (likely(!tcp_hdr(skb)->syn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) nwin <<= tp->rx_opt.snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) flag |= FLAG_WIN_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) tcp_update_wl(tp, ack_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) if (tp->snd_wnd != nwin) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) tp->snd_wnd = nwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) /* Note, it is the only place, where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) * fast path is recovered for sending TCP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) tcp_fast_path_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) if (!tcp_write_queue_empty(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) tcp_slow_start_after_idle_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) if (nwin > tp->max_window) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) tp->max_window = nwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) tcp_snd_una_update(tp, ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) return flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) u32 *last_oow_ack_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) if (*last_oow_ack_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) NET_INC_STATS(net, mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) return true; /* rate-limited: don't send yet! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) *last_oow_ack_time = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) return false; /* not rate-limited: go ahead, send dupack now! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) /* Return true if we're currently rate-limiting out-of-window ACKs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) * thus shouldn't send a dupack right now. We rate-limit dupacks in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) * attacks that send repeated SYNs or ACKs for the same connection. To
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) * do this, we do not send a duplicate SYNACK or ACK if the remote
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) int mib_idx, u32 *last_oow_ack_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) /* Data packets without SYNs are not likely part of an ACK loop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) !tcp_hdr(skb)->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) /* RFC 5961 7 [ACK Throttling] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) /* unprotected vars, we dont care of overwrites */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) static u32 challenge_timestamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) static unsigned int challenge_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) u32 count, now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) /* First check our per-socket dupack rate limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) if (__tcp_oow_rate_limited(net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) &tp->last_oow_ack_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) /* Then check host-wide RFC 5961 rate limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) now = jiffies / HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) if (now != challenge_timestamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) u32 half = (ack_limit + 1) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) challenge_timestamp = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) count = READ_ONCE(challenge_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) if (count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) WRITE_ONCE(challenge_count, count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) static void tcp_store_ts_recent(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) /* PAWS bug workaround wrt. ACK frames, the PAWS discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) * extra check below makes sure this can only happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) * for pure ACK frames. -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) * Not only, also it occurs for expired timestamps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) if (tcp_paws_check(&tp->rx_opt, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) /* This routine deals with acks during a TLP episode and ends an episode by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) if (before(ack, tp->tlp_high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) if (!tp->tlp_retrans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) /* TLP of new data has been acknowledged */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) } else if (flag & FLAG_DSACKING_ACK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) /* This DSACK means original and TLP probe arrived; no loss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) } else if (after(ack, tp->tlp_high_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) /* ACK advances: there was a loss, so reduce cwnd. Reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) * tlp_high_seq in tcp_init_cwnd_reduction()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) tcp_init_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) tcp_set_ca_state(sk, TCP_CA_CWR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) tcp_end_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) LINUX_MIB_TCPLOSSPROBERECOVERY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) /* Pure dupack: original and TLP probe arrived; no loss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) if (icsk->icsk_ca_ops->in_ack_event)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) icsk->icsk_ca_ops->in_ack_event(sk, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) /* Congestion control has updated the cwnd already. So if we're in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) * loss recovery then now we do any new sends (for FRTO) or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) * retransmits (for CA_Loss or CA_recovery) that make sense.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) static void tcp_xmit_recovery(struct sock *sk, int rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) if (unlikely(rexmit == REXMIT_NEW)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) __tcp_push_pending_frames(sk, tcp_current_mss(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) TCP_NAGLE_OFF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) if (after(tp->snd_nxt, tp->high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) tp->frto = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) tcp_xmit_retransmit_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) /* Returns the number of packets newly acked or sacked by the current ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) const struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) u32 delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) delivered = tp->delivered - prior_delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) if (flag & FLAG_ECE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) return delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) /* This routine deals with incoming acks, but not outgoing ones. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) struct tcp_sacktag_state sack_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) struct rate_sample rs = { .prior_delivered = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) u32 prior_snd_una = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) bool is_sack_reneg = tp->is_sack_reneg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) u32 ack_seq = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) u32 ack = TCP_SKB_CB(skb)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) int num_dupack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) int prior_packets = tp->packets_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) u32 delivered = tp->delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) u32 lost = tp->lost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) u32 prior_fack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) sack_state.first_sackt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) sack_state.rate = &rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) sack_state.sack_delivered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) /* We very likely will need to access rtx queue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) prefetch(sk->tcp_rtx_queue.rb_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) /* If the ack is older than previous acks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) * then we can probably ignore it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) if (before(ack, prior_snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) if (before(ack, prior_snd_una - tp->max_window)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) if (!(flag & FLAG_NO_CHALLENGE_ACK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) goto old_ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) /* If the ack includes data we haven't sent yet, discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) * this segment (RFC793 Section 3.9).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) if (after(ack, tp->snd_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) if (after(ack, prior_snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) flag |= FLAG_SND_UNA_ADVANCED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) icsk->icsk_retransmits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) #if IS_ENABLED(CONFIG_TLS_DEVICE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) if (static_branch_unlikely(&clean_acked_data_enabled.key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) if (icsk->icsk_clean_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) icsk->icsk_clean_acked(sk, ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) rs.prior_in_flight = tcp_packets_in_flight(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) /* ts_recent update must be made after we are sure that the packet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) * is in window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) if (flag & FLAG_UPDATE_TS_RECENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) FLAG_SND_UNA_ADVANCED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) /* Window is constant, pure forward advance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) * No more checks are required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) * Note, we use the fact that SND.UNA>=SND.WL2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) tcp_update_wl(tp, ack_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) tcp_snd_una_update(tp, ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) flag |= FLAG_WIN_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) u32 ack_ev_flags = CA_ACK_SLOWPATH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) if (ack_seq != TCP_SKB_CB(skb)->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) flag |= FLAG_DATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) if (TCP_SKB_CB(skb)->sacked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) &sack_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) flag |= FLAG_ECE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) ack_ev_flags |= CA_ACK_ECE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) if (sack_state.sack_delivered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) tcp_count_delivered(tp, sack_state.sack_delivered,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) flag & FLAG_ECE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) if (flag & FLAG_WIN_UPDATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) ack_ev_flags |= CA_ACK_WIN_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) tcp_in_ack_event(sk, ack_ev_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) /* This is a deviation from RFC3168 since it states that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) * "When the TCP data sender is ready to set the CWR bit after reducing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) * the congestion window, it SHOULD set the CWR bit only on the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) * new data packet that it transmits."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) * We accept CWR on pure ACKs to be more robust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) * with widely-deployed TCP implementations that do this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) tcp_ecn_accept_cwr(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) /* We passed data and got it acked, remove any soft error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) * log. Something worked...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) sk->sk_err_soft = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) icsk->icsk_probes_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) tp->rcv_tstamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) if (!prior_packets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) goto no_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) /* See if we can take anything off of the retransmit queue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) flag & FLAG_ECE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) tcp_rack_update_reo_wnd(sk, &rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) if (tp->tlp_high_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) tcp_process_tlp_ack(sk, ack, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) if (tcp_ack_is_dubious(sk, flag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) num_dupack = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) /* Consider if pure acks were aggregated in tcp_add_backlog() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) if (!(flag & FLAG_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) &rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) /* If needed, reset TLP/RTO timer when RACK doesn't set. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) if (flag & FLAG_SET_XMIT_TIMER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) tcp_set_xmit_timer(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) sk_dst_confirm(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) delivered = tcp_newly_delivered(sk, delivered, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) lost = tp->lost - lost; /* freshly marked lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) tcp_xmit_recovery(sk, rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) no_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) /* If data was DSACKed, see if we can undo a cwnd reduction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) if (flag & FLAG_DSACKING_ACK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) &rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) tcp_newly_delivered(sk, delivered, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) /* If this ack opens up a zero window, clear backoff. It was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) * being used to time the probes, and is probably far higher than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) * it needs to be for normal retransmission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) tcp_ack_probe(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) if (tp->tlp_high_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) tcp_process_tlp_ack(sk, ack, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) old_ack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) /* If data was SACKed, tag it and see if we should send more data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) * If data was DSACKed, see if we can undo a cwnd reduction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) if (TCP_SKB_CB(skb)->sacked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) &sack_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) &rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) tcp_newly_delivered(sk, delivered, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) tcp_xmit_recovery(sk, rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) bool syn, struct tcp_fastopen_cookie *foc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) bool exp_opt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) /* Valid only in SYN or SYN-ACK with an even length. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) if (!foc || !syn || len < 0 || (len & 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) if (len >= TCP_FASTOPEN_COOKIE_MIN &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) len <= TCP_FASTOPEN_COOKIE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) memcpy(foc->val, cookie, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) else if (len != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) len = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) foc->len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) foc->exp = exp_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) static bool smc_parse_options(const struct tcphdr *th,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) struct tcp_options_received *opt_rx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) const unsigned char *ptr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) int opsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) #if IS_ENABLED(CONFIG_SMC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) if (static_branch_unlikely(&tcp_have_smc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) if (th->syn && !(opsize & 1) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) opsize >= TCPOLEN_EXP_SMC_BASE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) opt_rx->smc_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) * value on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) const unsigned char *ptr = (const unsigned char *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) int length = (th->doff * 4) - sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) u16 mss = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) while (length > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) int opcode = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) int opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) case TCPOPT_EOL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) length--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) if (length < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) opsize = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) if (opsize < 2) /* "silly options" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) if (opsize > length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) return mss; /* fail on partial options */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) u16 in_mss = get_unaligned_be16(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) if (in_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) if (user_mss && user_mss < in_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) in_mss = user_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) mss = in_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) ptr += opsize - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) length -= opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) /* Look for tcp options. Normally only called on SYN and SYNACK packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) * But, this can also be called on packets in the established flow when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) * the fast version below fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) void tcp_parse_options(const struct net *net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) struct tcp_options_received *opt_rx, int estab,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) struct tcp_fastopen_cookie *foc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) const unsigned char *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) int length = (th->doff * 4) - sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) ptr = (const unsigned char *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) opt_rx->saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) opt_rx->saw_unknown = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) while (length > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) int opcode = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) int opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) case TCPOPT_EOL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) length--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) if (length < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) opsize = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) if (opsize < 2) /* "silly options" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) if (opsize > length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) return; /* don't parse partial options */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) case TCPOPT_MSS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) if (opsize == TCPOLEN_MSS && th->syn && !estab) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) u16 in_mss = get_unaligned_be16(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) if (in_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) if (opt_rx->user_mss &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) opt_rx->user_mss < in_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) in_mss = opt_rx->user_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) opt_rx->mss_clamp = in_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) case TCPOPT_WINDOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) if (opsize == TCPOLEN_WINDOW && th->syn &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) !estab && net->ipv4.sysctl_tcp_window_scaling) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) __u8 snd_wscale = *(__u8 *)ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) opt_rx->wscale_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) if (snd_wscale > TCP_MAX_WSCALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) snd_wscale,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) TCP_MAX_WSCALE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) snd_wscale = TCP_MAX_WSCALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) opt_rx->snd_wscale = snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) case TCPOPT_TIMESTAMP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) if ((opsize == TCPOLEN_TIMESTAMP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) ((estab && opt_rx->tstamp_ok) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) (!estab && net->ipv4.sysctl_tcp_timestamps))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) opt_rx->saw_tstamp = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) opt_rx->rcv_tsval = get_unaligned_be32(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) case TCPOPT_SACK_PERM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) if (opsize == TCPOLEN_SACK_PERM && th->syn &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) !estab && net->ipv4.sysctl_tcp_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) opt_rx->sack_ok = TCP_SACK_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) tcp_sack_reset(opt_rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) case TCPOPT_SACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) opt_rx->sack_ok) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) #ifdef CONFIG_TCP_MD5SIG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) case TCPOPT_MD5SIG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) * The MD5 Hash has already been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) * checked (see tcp_v{4,6}_do_rcv()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) case TCPOPT_FASTOPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) tcp_parse_fastopen_option(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) opsize - TCPOLEN_FASTOPEN_BASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) ptr, th->syn, foc, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) case TCPOPT_EXP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) /* Fast Open option shares code 254 using a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) * 16 bits magic number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) get_unaligned_be16(ptr) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) TCPOPT_FASTOPEN_MAGIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) tcp_parse_fastopen_option(opsize -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) TCPOLEN_EXP_FASTOPEN_BASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) ptr + 2, th->syn, foc, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) if (smc_parse_options(th, opt_rx, ptr, opsize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) opt_rx->saw_unknown = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) opt_rx->saw_unknown = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) ptr += opsize-2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) length -= opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) EXPORT_SYMBOL(tcp_parse_options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) const __be32 *ptr = (const __be32 *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) tp->rx_opt.saw_tstamp = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) ++ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) tp->rx_opt.rcv_tsval = ntohl(*ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) ++ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) if (*ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) tp->rx_opt.rcv_tsecr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) /* Fast parse options. This hopes to only see timestamps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) * If it is wrong it falls back on tcp_parse_options().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) static bool tcp_fast_parse_options(const struct net *net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) const struct tcphdr *th, struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) /* In the spirit of fast parsing, compare doff directly to constant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) * values. Because equality is used, short doff can be ignored here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) if (th->doff == (sizeof(*th) / 4)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) } else if (tp->rx_opt.tstamp_ok &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) if (tcp_parse_aligned_timestamp(tp, th))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) tp->rx_opt.rcv_tsecr -= tp->tsoffset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) #ifdef CONFIG_TCP_MD5SIG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) * Parse MD5 Signature option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) int length = (th->doff << 2) - sizeof(*th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) const u8 *ptr = (const u8 *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) /* If not enough data remaining, we can short cut */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) while (length >= TCPOLEN_MD5SIG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) int opcode = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) int opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) case TCPOPT_EOL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) case TCPOPT_NOP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) length--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) opsize = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) if (opsize < 2 || opsize > length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) if (opcode == TCPOPT_MD5SIG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) ptr += opsize - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) length -= opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) EXPORT_SYMBOL(tcp_parse_md5sig_option);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) * it can pass through stack. So, the following predicate verifies that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) * this segment is not used for anything but congestion avoidance or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) * fast retransmit. Moreover, we even are able to eliminate most of such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) * second order effects, if we apply some small "replay" window (~RTO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) * to timestamp space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) * All these measures still do not guarantee that we reject wrapped ACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) * on networks with high bandwidth, when sequence space is recycled fastly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) * but it guarantees that such events will be very rare and do not affect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) * connection seriously. This doesn't look nice, but alas, PAWS is really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) * buggy extension.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) * [ Later note. Even worse! It is buggy for segments _with_ data. RFC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) * states that events when retransmit arrives after original data are rare.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) * the biggest problem on large power networks even with minor reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) * OK, let's give it small replay window. If peer clock is even 1hz, it is safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) * up to bandwidth of 18Gigabit/sec. 8) ]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) u32 seq = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) u32 ack = TCP_SKB_CB(skb)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) return (/* 1. Pure ACK with correct sequence number. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) /* 2. ... and duplicate ACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) ack == tp->snd_una &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) /* 3. ... and does not update window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) /* 4. ... and sits in replay window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) static inline bool tcp_paws_discard(const struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) !tcp_disordered_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) /* Check segment sequence number for validity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) * Segment controls are considered valid, if the segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) * fits to the window after truncation to the window. Acceptability
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) * of data (and SYN, FIN, of course) is checked separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) * See tcp_data_queue(), for example.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) * Also, controls (RST is main one) are accepted using RCV.WUP instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) * of RCV.NXT. Peer still did not advance his SND.UNA when we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) * delayed ACK, so that hisSND.UNA<=ourRCV.WUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) * (borrowed from freebsd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) return !before(end_seq, tp->rcv_wup) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) /* When we get a reset we do this. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) void tcp_reset(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) trace_tcp_receive_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) /* We want the right error as BSD sees it (and indeed as we do). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) case TCP_SYN_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) sk->sk_err = ECONNREFUSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) case TCP_CLOSE_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) sk->sk_err = EPIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) case TCP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) sk->sk_err = ECONNRESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) /* This barrier is coupled with smp_rmb() in tcp_poll() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) tcp_write_queue_purge(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) sk->sk_error_report(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) * Process the FIN bit. This now behaves as it is supposed to work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) * and the FIN takes effect when it is validly part of sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) * space. Not before when we get holes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) * (and thence onto LAST-ACK and finally, CLOSE, we never enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) * TIME-WAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) * If we are in FINWAIT-1, a received FIN indicates simultaneous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) * close and we go into CLOSING (and later onto TIME-WAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) void tcp_fin(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) sk->sk_shutdown |= RCV_SHUTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) sock_set_flag(sk, SOCK_DONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) case TCP_SYN_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) case TCP_ESTABLISHED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) /* Move to CLOSE_WAIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) tcp_set_state(sk, TCP_CLOSE_WAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) inet_csk_enter_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) case TCP_CLOSE_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) case TCP_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) /* Received a retransmission of the FIN, do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) * nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) case TCP_LAST_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) /* RFC793: Remain in the LAST-ACK state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) case TCP_FIN_WAIT1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) /* This case occurs when a simultaneous close
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) * happens, we must ack the received FIN and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) * enter the CLOSING state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) tcp_set_state(sk, TCP_CLOSING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) case TCP_FIN_WAIT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) /* Received a FIN -- send ACK and enter TIME_WAIT. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) tcp_time_wait(sk, TCP_TIME_WAIT, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) /* Only TCP_LISTEN and TCP_CLOSE are left, in these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) * cases we should never reach this piece of code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) pr_err("%s: Impossible, sk->sk_state=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) __func__, sk->sk_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) /* It _is_ possible, that we have something out-of-order _after_ FIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) * Probably, we should reset in this case. For now drop them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) skb_rbtree_purge(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) tcp_sack_reset(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) if (!sock_flag(sk, SOCK_DEAD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) /* Do not send POLL_HUP for half duplex close. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) if (sk->sk_shutdown == SHUTDOWN_MASK ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) sk->sk_state == TCP_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) if (before(seq, sp->start_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) sp->start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) if (after(end_seq, sp->end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) sp->end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) if (before(seq, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) tp->rx_opt.dsack = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) tp->duplicate_sack[0].start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) tp->duplicate_sack[0].end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) if (!tp->rx_opt.dsack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) tcp_dsack_set(sk, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) /* When the ACK path fails or drops most ACKs, the sender would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) * timeout and spuriously retransmit the same segment repeatedly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) * The receiver remembers and reflects via DSACKs. Leverage the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) * DSACK state and change the txhash to re-route speculatively.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) sk_rethink_txhash(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) u32 end_seq = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) tcp_rcv_spurious_retrans(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) end_seq = tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) /* These routines update the SACK block as out-of-order packets arrive or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) * in-order packets close up the sequence space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) struct tcp_sack_block *swalk = sp + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) /* See if the recent change to the first SACK eats into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) * or hits the sequence space of other SACK blocks, if so coalesce.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) /* Zap SWALK, by moving every further SACK up by one slot.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) * Decrease num_sacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) tp->rx_opt.num_sacks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) sp[i] = sp[i + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) this_sack++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) swalk++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) static void tcp_sack_compress_send_ack(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) if (!tp->compressed_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) __sock_put(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) /* Since we have to send one ack finally,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) * substract one from tp->compressed_ack to keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) * LINUX_MIB_TCPACKCOMPRESSED accurate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) tp->compressed_ack - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) tp->compressed_ack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) /* Reasonable amount of sack blocks included in TCP SACK option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) * The max is 4, but this becomes 3 if TCP timestamps are there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) * Given that SACK packets might be lost, be conservative and use 2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) #define TCP_SACK_BLOCKS_EXPECTED 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) int cur_sacks = tp->rx_opt.num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) if (!cur_sacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) goto new_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) if (tcp_sack_extend(sp, seq, end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) tcp_sack_compress_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) /* Rotate this_sack to the first one. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) for (; this_sack > 0; this_sack--, sp--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) swap(*sp, *(sp - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) if (cur_sacks > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) tcp_sack_maybe_coalesce(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) tcp_sack_compress_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) /* Could not find an adjacent existing SACK, build a new one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) * put it at the front, and shift everyone else down. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) * always know there is at least one SACK present already here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) * If the sack array is full, forget about the last one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) if (this_sack >= TCP_NUM_SACKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) this_sack--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) tp->rx_opt.num_sacks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) sp--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) for (; this_sack > 0; this_sack--, sp--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) *sp = *(sp - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) new_sack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) /* Build the new head SACK, and we're done. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) sp->start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) sp->end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) tp->rx_opt.num_sacks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) /* RCV.NXT advances, some SACKs should be eaten. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) static void tcp_sack_remove(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) int num_sacks = tp->rx_opt.num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) tp->rx_opt.num_sacks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) for (this_sack = 0; this_sack < num_sacks;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) /* Check if the start of the sack is covered by RCV.NXT. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) if (!before(tp->rcv_nxt, sp->start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) /* RCV.NXT must cover all the block! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) WARN_ON(before(tp->rcv_nxt, sp->end_seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) /* Zap this SACK, by moving forward any other SACKS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) for (i = this_sack+1; i < num_sacks; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) tp->selective_acks[i-1] = tp->selective_acks[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) num_sacks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) this_sack++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) sp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) tp->rx_opt.num_sacks = num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) * tcp_try_coalesce - try to merge skb to prior one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) * @sk: socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) * @to: prior buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) * @from: buffer to add in queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) * @fragstolen: pointer to boolean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) * Before queueing skb @from after @to, try to merge them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) * to reduce overall memory use and queue lengths, if cost is small.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) * Packets in ofo or receive queues can stay a long time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) * Better try to coalesce them right now to avoid future collapses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) * Returns true if caller should free @from instead of queueing it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) static bool tcp_try_coalesce(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) struct sk_buff *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) struct sk_buff *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) bool *fragstolen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) int delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) *fragstolen = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) /* Its possible this segment overlaps with prior segment in queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) if (!mptcp_skb_can_collapse(to, from))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) #ifdef CONFIG_TLS_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) if (from->decrypted != to->decrypted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) if (!skb_try_coalesce(to, from, fragstolen, &delta))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) atomic_add(delta, &sk->sk_rmem_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) sk_mem_charge(sk, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) if (TCP_SKB_CB(from)->has_rxtstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) TCP_SKB_CB(to)->has_rxtstamp = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) to->tstamp = from->tstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) static bool tcp_ooo_try_coalesce(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) struct sk_buff *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) struct sk_buff *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) bool *fragstolen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) bool res = tcp_try_coalesce(sk, to, from, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) /* In case tcp_drop() is called later, update to->gso_segs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) max_t(u16, 1, skb_shinfo(from)->gso_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) static void tcp_drop(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) trace_android_vh_kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) sk_drops_add(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) /* This one checks to see if we can put data from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) * out_of_order queue into the receive_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) static void tcp_ofo_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) __u32 dsack_high = tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) bool fin, fragstolen, eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) struct sk_buff *skb, *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) p = rb_first(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) skb = rb_to_skb(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654) __u32 dsack = dsack_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) dsack_high = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) rb_erase(&skb->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) tail = skb_peek_tail(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) if (!eaten)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) __skb_queue_tail(&sk->sk_receive_queue, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) if (unlikely(fin)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) tcp_fin(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) /* tcp_fin() purges tp->out_of_order_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) * so we must end this loop right now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) static bool tcp_prune_ofo_queue(struct sock *sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) static int tcp_prune_queue(struct sock *sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) unsigned int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) !sk_rmem_schedule(sk, skb, size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) if (tcp_prune_queue(sk) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) while (!sk_rmem_schedule(sk, skb, size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) if (!tcp_prune_ofo_queue(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) struct rb_node **p, *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) struct sk_buff *skb1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) u32 seq, end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) bool fragstolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) tcp_ecn_check_ce(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) /* Disable header prediction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) seq = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) end_seq = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) p = &tp->out_of_order_queue.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) /* Initial out of order segment, build 1 SACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) if (tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) tp->rx_opt.num_sacks = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) tp->selective_acks[0].start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) tp->selective_acks[0].end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) rb_link_node(&skb->rbnode, NULL, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) tp->ooo_last_skb = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) goto end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) /* In the typical case, we are adding an skb to the end of the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) skb, &fragstolen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) coalesce_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) /* For non sack flows, do not grow window to force DUPACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) * and trigger fast retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) goto add_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) parent = &tp->ooo_last_skb->rbnode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) /* Find place to insert this segment. Handle overlaps on the way. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) skb1 = rb_to_skb(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) if (before(seq, TCP_SKB_CB(skb1)->seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) p = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) /* All the bits are present. Drop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) LINUX_MIB_TCPOFOMERGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) tcp_dsack_set(sk, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) goto add_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) if (after(seq, TCP_SKB_CB(skb1)->seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) /* Partial overlap. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) /* skb's seq == skb1's seq and skb covers skb1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) * Replace skb1 with skb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) rb_replace_node(&skb1->rbnode, &skb->rbnode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) tcp_dsack_extend(sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) TCP_SKB_CB(skb1)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) TCP_SKB_CB(skb1)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) LINUX_MIB_TCPOFOMERGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) tcp_drop(sk, skb1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) goto merge_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) } else if (tcp_ooo_try_coalesce(sk, skb1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) skb, &fragstolen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) goto coalesce_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) insert:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) /* Insert segment into RB tree. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) rb_link_node(&skb->rbnode, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) merge_right:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) /* Remove other segments covered by skb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) while ((skb1 = skb_rb_next(skb)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) TCP_SKB_CB(skb1)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) tcp_drop(sk, skb1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) /* If there is no skb after us, we are the last_skb ! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) if (!skb1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) tp->ooo_last_skb = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) add_sack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) tcp_sack_new_ofo_skb(sk, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) /* For non sack flows, do not grow window to force DUPACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) * and trigger fast retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) skb_condense(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) skb_set_owner_r(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) bool *fragstolen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) int eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) eaten = (tail &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) tcp_try_coalesce(sk, tail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) skb, fragstolen)) ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) if (!eaten) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) __skb_queue_tail(&sk->sk_receive_queue, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) skb_set_owner_r(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) return eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) int data_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) bool fragstolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) if (size == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) if (size > PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) data_len = npages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) size = data_len + (size & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) skb = alloc_skb_with_frags(size - data_len, data_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) PAGE_ALLOC_COSTLY_ORDER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) &err, sk->sk_allocation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) skb_put(skb, size - data_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) skb->data_len = data_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) skb->len = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) if (tcp_queue_rcv(sk, skb, &fragstolen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) WARN_ON_ONCE(fragstolen); /* should not happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) err_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) void tcp_data_ready(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) int avail = tp->rcv_nxt - tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) !sock_flag(sk, SOCK_DONE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) bool fragstolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) int eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) if (sk_is_mptcp(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) mptcp_incoming_options(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) skb_dst_drop(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) __skb_pull(skb, tcp_hdr(skb)->doff * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) tp->rx_opt.dsack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) /* Queue data for delivery to the user.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) * Packets in sequence go to the receive queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) * Out of sequence packets to the out_of_order_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) if (tcp_receive_window(tp) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) goto out_of_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) /* Ok. In sequence. In window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) queue_and_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) if (skb_queue_len(&sk->sk_receive_queue) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) sk_forced_mem_schedule(sk, skb->truesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) eaten = tcp_queue_rcv(sk, skb, &fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) if (skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) tcp_event_data_recv(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) tcp_fin(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) tcp_ofo_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) /* RFC5681. 4.2. SHOULD send immediate ACK, when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) * gap in queue is filled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) if (tp->rx_opt.num_sacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) tcp_sack_remove(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) tcp_fast_path_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) if (eaten > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) tcp_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) tcp_rcv_spurious_retrans(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) /* A retransmit, 2nd most common case. Force an immediate ack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) out_of_window:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) /* Out of window. F.e. zero window probe. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) goto out_of_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) /* Partial packet, seq < rcv_next < end_seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) /* If window is closed, drop tail of packet. But after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) * remembering D-SACK for its head made in previous line.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) if (!tcp_receive_window(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) goto out_of_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) goto queue_and_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) tcp_data_queue_ofo(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) if (list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) return !skb_queue_is_last(list, skb) ? skb->next : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) return skb_rb_next(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) struct sk_buff_head *list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) struct rb_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) struct sk_buff *next = tcp_skb_next(skb, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) if (list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) __skb_unlink(skb, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) rb_erase(&skb->rbnode, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) struct rb_node **p = &root->rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) struct rb_node *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) struct sk_buff *skb1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) skb1 = rb_to_skb(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) p = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) rb_link_node(&skb->rbnode, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) rb_insert_color(&skb->rbnode, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) /* Collapse contiguous sequence of skbs head..tail with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) * sequence numbers start..end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) * If tail is NULL, this means until the end of the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) * Segments with FIN/SYN are not collapsed (only because this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) * simplifies code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) struct sk_buff *skb = head, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) struct sk_buff_head tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) bool end_of_skbs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) /* First, check that queue is collapsible and find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) * the point where collapsing can be useful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) n = tcp_skb_next(skb, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) /* No new bits? It is possible on ofo queue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) skb = tcp_collapse_one(sk, skb, list, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) /* The first skb to collapse is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) * - not SYN/FIN and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) * - bloated or contains data before "start" or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) * overlaps to the next one and mptcp allow collapsing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) (tcp_win_from_space(sk, skb->truesize) > skb->len ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) before(TCP_SKB_CB(skb)->seq, start))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) end_of_skbs = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) end_of_skbs = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) /* Decided to skip this, advance start seq. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) start = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) if (end_of_skbs ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) __skb_queue_head_init(&tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) while (before(start, end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) struct sk_buff *nskb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) nskb = alloc_skb(copy, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) if (!nskb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) #ifdef CONFIG_TLS_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) nskb->decrypted = skb->decrypted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) if (list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) __skb_queue_before(list, skb, nskb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) skb_set_owner_r(nskb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) mptcp_skb_ext_move(nskb, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) /* Copy data, releasing collapsed skbs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) while (copy > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) int offset = start - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) int size = TCP_SKB_CB(skb)->end_seq - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) BUG_ON(offset < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) if (size > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) size = min(copy, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) TCP_SKB_CB(nskb)->end_seq += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) copy -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) start += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) skb = tcp_collapse_one(sk, skb, list, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) if (!skb ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) skb == tail ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) !mptcp_skb_can_collapse(nskb, skb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) goto end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) #ifdef CONFIG_TLS_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) if (skb->decrypted != nskb->decrypted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) goto end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) skb_queue_walk_safe(&tmp, skb, n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) tcp_rbtree_insert(root, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) * and tcp_collapse() them until all the queue is collapsed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) static void tcp_collapse_ofo_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) u32 range_truesize, sum_tiny = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) struct sk_buff *skb, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) u32 start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) skb = skb_rb_first(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) new_range:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) if (!skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) start = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) end = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) range_truesize = skb->truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) for (head = skb;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) skb = skb_rb_next(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) /* Range is terminated when we see a gap or when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) * we are at the queue end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) if (!skb ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) after(TCP_SKB_CB(skb)->seq, end) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) before(TCP_SKB_CB(skb)->end_seq, start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) /* Do not attempt collapsing tiny skbs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) if (range_truesize != head->truesize ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) tcp_collapse(sk, NULL, &tp->out_of_order_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) head, skb, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) sum_tiny += range_truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) if (sum_tiny > sk->sk_rcvbuf >> 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) goto new_range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) range_truesize += skb->truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) start = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) if (after(TCP_SKB_CB(skb)->end_seq, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) end = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) * Clean the out-of-order queue to make room.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) * We drop high sequences packets to :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) * 1) Let a chance for holes to be filled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) * 2) not add too big latencies if thousands of packets sit there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) * (But if application shrinks SO_RCVBUF, we could still end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) * freeing whole queue here)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) * Return true if queue has shrunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) static bool tcp_prune_ofo_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252) struct rb_node *node, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) int goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) goal = sk->sk_rcvbuf >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) node = &tp->ooo_last_skb->rbnode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) prev = rb_prev(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) rb_erase(node, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) goal -= rb_to_skb(node)->truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) tcp_drop(sk, rb_to_skb(node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) if (!prev || goal <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) !tcp_under_memory_pressure(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) goal = sk->sk_rcvbuf >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) node = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) } while (node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) tp->ooo_last_skb = rb_to_skb(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) /* Reset SACK state. A conforming SACK implementation will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) * do the same at a timeout based retransmit. When a connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) * is in a sad state like this, we care only about integrity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) * of the connection not performance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) if (tp->rx_opt.sack_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) tcp_sack_reset(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) /* Reduce allocated memory if we can, trying to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288) * the socket within its memory limits again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) * Return less than zero if we should start dropping frames
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) * until the socket owning process reads some of the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) * to stabilize the situation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) static int tcp_prune_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) tcp_clamp_window(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) else if (tcp_under_memory_pressure(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) tcp_collapse_ofo_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) if (!skb_queue_empty(&sk->sk_receive_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) tcp_collapse(sk, &sk->sk_receive_queue, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) skb_peek(&sk->sk_receive_queue),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) /* Collapsing did not help, destructive actions follow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) * This must not ever occur. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) tcp_prune_ofo_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) /* If we are really being abused, tell the caller to silently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) * drop receive data on the floor. It will get retransmitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) * and hopefully then we'll have sufficient space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) /* Massive buffer overcommit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) static bool tcp_should_expand_sndbuf(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) /* If the user specified a specific send buffer setting, do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) * not modify it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348) /* If we are under global TCP memory pressure, do not expand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) if (tcp_under_memory_pressure(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) /* If we are under soft global TCP memory pressure, do not expand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) /* If we filled the congestion window, do not expand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) static void tcp_new_space(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) if (tcp_should_expand_sndbuf(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) tcp_sndbuf_expand(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369) tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372) sk->sk_write_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) static void tcp_check_space(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) /* pairs with tcp_poll() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) if (sk->sk_socket &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) tcp_new_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) static inline void tcp_data_snd_check(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) tcp_push_pending_frames(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) tcp_check_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394) * Check if sending an ack is needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) unsigned long rtt, delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) /* More than one full frame received... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) /* ... and right edge of window advances far enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) * (tcp_recvmsg() will send ACK otherwise).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) * If application uses SO_RCVLOWAT, we want send ack now if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) * we have not received enough bytes to satisfy the condition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) __tcp_select_window(sk) >= tp->rcv_wnd)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410) /* We ACK each frame or... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) tcp_in_quickack_mode(sk) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412) /* Protocol state mandates a one-time immediate ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) send_now:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) tcp_send_delayed_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) if (!tcp_is_sack(tp) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) goto send_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) tp->dup_ack_counter = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432) if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) tp->dup_ack_counter++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) goto send_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) tp->compressed_ack++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) if (hrtimer_is_queued(&tp->compressed_ack_timer))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) /* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) rtt = tp->rcv_rtt_est.rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) if (tp->srtt_us && tp->srtt_us < rtt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) rtt = tp->srtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) rtt * (NSEC_PER_USEC >> 3)/20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) sock_hold(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451) HRTIMER_MODE_REL_PINNED_SOFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) static inline void tcp_ack_snd_check(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) if (!inet_csk_ack_scheduled(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457) /* We sent a data segment already. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) __tcp_ack_snd_check(sk, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464) * This routine is only called when we have urgent data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465) * signaled. Its the 'slow' part of tcp_urg. It could be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) * moved inline now as tcp_urg is only called from one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) * place. We handle URGent data wrong. We have to - as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) * BSD still doesn't use the correction from RFC961.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) * For 1003.1g we should support a new option TCP_STDURG to permit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) * either form (or just set the sysctl tcp_stdurg).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) u32 ptr = ntohs(th->urg_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) ptr--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) ptr += ntohl(th->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) /* Ignore urgent data that we've already seen and read. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) if (after(tp->copied_seq, ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) /* Do not replay urg ptr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) * NOTE: interesting situation not covered by specs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) * Misbehaving sender may send urg ptr, pointing to segment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) * which we already have in ofo queue. We are not able to fetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) * such data and will stay in TCP_URG_NOTYET until will be eaten
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) * by recvmsg(). Seems, we are not obliged to handle such wicked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) * situations. But it is worth to think about possibility of some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) * DoSes using some hypothetical application level deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) if (before(ptr, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) /* Do we already have a newer (or duplicate) urgent pointer? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) if (tp->urg_data && !after(ptr, tp->urg_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) /* Tell the world about our new urgent pointer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) sk_send_sigurg(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) /* We may be adding urgent data when the last byte read was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) * urgent. To do this requires some care. We cannot just ignore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) * tp->copied_seq since we would read the last urgent byte again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) * as data, nor can we alter copied_seq until this data arrives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) * or we break the semantics of SIOCATMARK (and thus sockatmark())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512) * NOTE. Double Dutch. Rendering to plain English: author of comment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) * and expect that both A and B disappear from stream. This is _wrong_.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) * Though this happens in BSD with high probability, this is occasional.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) * Any application relying on this is buggy. Note also, that fix "works"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) * only in this artificial test. Insert some normal data between A and B and we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) * decline of BSD again. Verdict: it is better to remove to trap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) * buggy users.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) tp->copied_seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) __skb_unlink(skb, &sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) tp->urg_data = TCP_URG_NOTYET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) WRITE_ONCE(tp->urg_seq, ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) /* Disable header prediction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535) tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) /* This is the 'fast' part of urgent handling. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) /* Check if we get a new urgent pointer - normally not. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) if (th->urg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) tcp_check_urg(sk, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) /* Do we wait for any urgent data? - normally not... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) if (tp->urg_data == TCP_URG_NOTYET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) th->syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) /* Is the urgent pointer pointing into this packet? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) if (ptr < skb->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) u8 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) if (skb_copy_bits(skb, ptr, &tmp, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) tp->urg_data = TCP_URG_VALID | tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) /* Accept RST for rcv_nxt - 1 after a FIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566) * FIN is sent followed by a RST packet. The RST is sent with the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) * sequence number as the FIN, and thus according to RFC 5961 a challenge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) * ACK should be sent. However, Mac OSX rate limits replies to challenge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569) * ACKs on the closed socket. In addition middleboxes can drop either the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) * challenge ACK or a subsequent RST.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) TCPF_CLOSING));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) /* Does PAWS and seqno based validation of an incoming segment, flags will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) * play significant role here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) const struct tcphdr *th, int syn_inerr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) bool rst_seq_match = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) /* RFC1323: H1. Apply PAWS check first. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) tp->rx_opt.saw_tstamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) tcp_paws_discard(sk, skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) if (!th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) if (!tcp_oow_rate_limited(sock_net(sk), skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) LINUX_MIB_TCPACKSKIPPEDPAWS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) &tp->last_oow_ack_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) tcp_send_dupack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) /* Reset is accepted even if it did not pass PAWS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) /* Step 1: check sequence number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) /* RFC793, page 37: "In all states except SYN-SENT, all reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) * (RST) segments are validated by checking their SEQ-fields."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) * And page 69: "If an incoming segment is not acceptable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) * an acknowledgment should be sent in reply (unless the RST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) * bit is set, if so drop the segment and return)".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) if (!th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) if (th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) goto syn_challenge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) if (!tcp_oow_rate_limited(sock_net(sk), skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) LINUX_MIB_TCPACKSKIPPEDSEQ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) &tp->last_oow_ack_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) tcp_send_dupack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) } else if (tcp_reset_check(sk, skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) /* Step 2: check RST bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) if (th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) /* RFC 5961 3.2 (extend to match against (RCV.NXT - 1) after a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629) * FIN and SACK too if available):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) * If seq num matches RCV.NXT or (RCV.NXT - 1) after a FIN, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) * the right-most SACK block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) * then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) * RESET the connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634) * else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) * Send a challenge ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) tcp_reset_check(sk, skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) rst_seq_match = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) int max_sack = sp[0].end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) ++this_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) max_sack = after(sp[this_sack].end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) max_sack) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) sp[this_sack].end_seq : max_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) if (TCP_SKB_CB(skb)->seq == max_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) rst_seq_match = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) if (rst_seq_match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) /* Disable TFO if RST is out-of-order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) * and no data has been received
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) * for current active TFO socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) if (tp->syn_fastopen && !tp->data_segs_in &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) sk->sk_state == TCP_ESTABLISHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) tcp_fastopen_active_disable(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) /* step 3: check security and precedence [ignored] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) /* step 4: Check for a SYN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) * RFC 5961 4.2 : Send a challenge ack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) if (th->syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) syn_challenge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) if (syn_inerr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) bpf_skops_parse_hdr(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) * TCP receive function for the ESTABLISHED state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) * It is split into a fast path and a slow path. The fast path is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) * disabled when:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699) * - A zero window was announced from us - zero window probing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) * is only handled properly in the slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701) * - Out of order segments arrived.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) * - Urgent data is expected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) * - There is no buffer space left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) * - Unexpected TCP flags/window values/header lengths are received
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705) * (detected by checking the TCP header against pred_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706) * - Data is sent in both directions. Fast path only supports pure senders
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) * or pure receivers (this means either the sequence number or the ack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708) * value must stay constant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) * - Unexpected TCP option.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) * When these conditions are not satisfied it drops into a standard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) * receive procedure patterned after RFC793 to handle all cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713) * The first three cases are guaranteed by proper pred_flags setting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714) * the rest is checked inline. Fast processing is turned on in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) * tcp_data_queue when everything is OK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) const struct tcphdr *th = (const struct tcphdr *)skb->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) unsigned int len = skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) /* TCP congestion window tracking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) trace_tcp_probe(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) if (unlikely(!sk->sk_rx_dst))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) * Header prediction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) * The code loosely follows the one in the famous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) * "30 instruction TCP receive" Van Jacobson mail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) * Van's trick is to deposit buffers into socket queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) * on a device interrupt, to call tcp_recv function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) * on the receive process context and checksum and copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) * the buffer to user space. smart...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) * Our current scheme is not silly either but we take the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) * extra cost of the net_bh soft interrupt processing...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) * We do checksum and copy also but from device to kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746) /* pred_flags is 0xS?10 << 16 + snd_wnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) * if header_prediction is to be made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748) * 'S' will always be tp->tcp_header_len >> 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) * '?' will be 0 for the fast path, otherwise pred_flags is 0 to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) * turn it off (when there are holes in the receive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) * space for instance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752) * PSH flag is ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) int tcp_header_len = tp->tcp_header_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) /* Timestamp header prediction: tcp_header_len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) * is automatically equal to th->doff*4 due to pred_flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) * match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) /* Check timestamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) /* No? Slow path! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) if (!tcp_parse_aligned_timestamp(tp, th))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) goto slow_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) /* If PAWS failed, check it more carefully in slow path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) goto slow_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) /* DO NOT update ts_recent here, if checksum fails
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) * and timestamp was corrupted part, it will result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) * in a hung connection since we will drop all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) * future packets due to the PAWS test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) if (len <= tcp_header_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) /* Bulk data transfer: sender */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) if (len == tcp_header_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) /* Predicted packet is in window by definition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786) * seq == rcv_nxt and rcv_wup <= rcv_nxt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) * Hence, check seq<=rcv_wup reduces to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) if (tcp_header_len ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) tp->rcv_nxt == tp->rcv_wup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) /* We know that such packets are checksummed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) * on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) tcp_ack(sk, skb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) /* When receiving pure ack in fast path, update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) * last ts ecr directly instead of calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) * tcp_rcv_rtt_measure_ts()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) } else { /* Header too small */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) int eaten = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) bool fragstolen = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814) if (tcp_checksum_complete(skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) goto csum_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) if ((int)skb->truesize > sk->sk_forward_alloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) goto step5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) /* Predicted packet is in window by definition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) * seq == rcv_nxt and rcv_wup <= rcv_nxt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) * Hence, check seq<=rcv_wup reduces to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) if (tcp_header_len ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) tp->rcv_nxt == tp->rcv_wup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) tcp_rcv_rtt_measure_ts(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) /* Bulk data transfer: receiver */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) __skb_pull(skb, tcp_header_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) eaten = tcp_queue_rcv(sk, skb, &fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) tcp_event_data_recv(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) /* Well, only one small jumplet in fast path... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) tcp_ack(sk, skb, FLAG_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) if (!inet_csk_ack_scheduled(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) goto no_ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) __tcp_ack_snd_check(sk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) no_ack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) if (eaten)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) tcp_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) slow_path:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) if (len < (th->doff << 2) || tcp_checksum_complete(skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) goto csum_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) if (!th->ack && !th->rst && !th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) * Standard slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) if (!tcp_validate_incoming(sk, skb, th, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) step5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) tcp_rcv_rtt_measure_ts(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) /* Process urgent data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) tcp_urg(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) /* step 7: process the segment text */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) tcp_data_queue(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) tcp_ack_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) csum_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) EXPORT_SYMBOL(tcp_rcv_established);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) tcp_mtup_init(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) icsk->icsk_af_ops->rebuild_header(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) tcp_init_metrics(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) /* Initialize the congestion window to start the transfer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907) * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) * retransmitted. In light of RFC6298 more aggressive 1sec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) * retransmission has occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) if (tp->total_retrans > 1 && tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) tp->snd_cwnd = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) bpf_skops_established(sk, bpf_op, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) /* Initialize congestion control unless BPF initialized it already: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) if (!icsk->icsk_ca_initialized)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) tcp_init_congestion_control(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) tcp_init_buffer_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) tcp_set_state(sk, TCP_ESTABLISHED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) icsk->icsk_ack.lrcvtime = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) security_inet_conn_established(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) sk_mark_napi_id(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) /* Prevent spurious tcp_cwnd_restart() on first data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) * packet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) tp->lsndtime = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) if (sock_flag(sk, SOCK_KEEPOPEN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) if (!tp->rx_opt.snd_wscale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) __tcp_fast_path_on(tp, tp->snd_wnd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956) struct tcp_fastopen_cookie *cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) bool syn_drop = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) if (mss == tp->rx_opt.user_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) struct tcp_options_received opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) /* Get original SYNACK MSS value if user MSS sets mss_clamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) tcp_clear_options(&opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) opt.user_mss = opt.mss_clamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) mss = opt.mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) if (!tp->syn_fastopen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) /* Ignore an unsolicited cookie */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) cookie->len = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) } else if (tp->total_retrans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) /* SYN timed out and the SYN-ACK neither has a cookie nor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) * acknowledges data. Presumably the remote received only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) * the retransmitted (regular) SYNs: either the original
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) * SYN-data or the corresponding SYN-ACK was dropped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) syn_drop = (cookie->len < 0 && data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983) } else if (cookie->len < 0 && !tp->syn_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) /* We requested a cookie but didn't get it. If we did not use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985) * the (old) exp opt format then try so next time (try_exp=1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986) * Otherwise we go back to use the RFC7413 opt (try_exp=2).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) try_exp = tp->syn_fastopen_exp ? 2 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993) if (data) { /* Retransmit unacked data in SYN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) if (tp->total_retrans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998) skb_rbtree_walk_from(data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999) if (__tcp_retransmit_skb(sk, data, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) tcp_rearm_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003) NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) LINUX_MIB_TCPFASTOPENACTIVEFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007) tp->syn_data_acked = tp->syn_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) if (tp->syn_data_acked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) /* SYN-data is counted as two separate packets in tcp_ack() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011) if (tp->delivered > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012) --tp->delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015) tcp_fastopen_add_skb(sk, synack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) static void smc_check_reset_syn(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) #if IS_ENABLED(CONFIG_SMC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023) if (static_branch_unlikely(&tcp_have_smc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) if (tp->syn_smc && !tp->rx_opt.smc_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) tp->syn_smc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) static void tcp_try_undo_spurious_syn(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033) u32 syn_stamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) /* undo_marker is set when SYN or SYNACK times out. The timeout is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) * spurious if the ACK's timestamp option echo value matches the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037) * original SYN timestamp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) syn_stamp = tp->retrans_stamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040) if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041) syn_stamp == tp->rx_opt.rcv_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046) const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) struct tcp_fastopen_cookie foc = { .len = -1 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051) int saved_clamp = tp->rx_opt.mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052) bool fastopen_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056) tp->rx_opt.rcv_tsecr -= tp->tsoffset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) if (th->ack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059) /* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) * "If the state is SYN-SENT then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) * first check the ACK bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) * If the ACK bit is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063) * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) * a reset (unless the RST bit is set, if so drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) * the segment and return)"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067) if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) /* Previous FIN/ACK or RST/ACK might be ignored. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070) if (icsk->icsk_retransmits == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071) inet_csk_reset_xmit_timer(sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072) ICSK_TIME_RETRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073) TCP_TIMEOUT_MIN, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074) goto reset_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077) if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078) !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079) tcp_time_stamp(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080) NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081) LINUX_MIB_PAWSACTIVEREJECTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082) goto reset_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) /* Now ACK is acceptable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) * "If the RST bit is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088) * If the ACK was acceptable then signal the user "error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) * connection reset", drop the segment, enter CLOSED state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090) * delete TCB, and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) if (th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) /* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) * "fifth, if neither of the SYN or RST bits is set then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100) * drop the segment and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102) * See note below!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) * --ANK(990513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) if (!th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) goto discard_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108) /* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) * "If the SYN bit is on ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) * are acceptable then ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) * (our SYN has been ACKed), change the connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) * state to ESTABLISHED..."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115) tcp_ecn_rcv_synack(tp, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) tcp_try_undo_spurious_syn(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) tcp_ack(sk, skb, FLAG_SLOWPATH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) /* Ok.. it's good. Set up sequence numbers and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) * move to established.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124) WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127) /* RFC1323: The window in SYN & SYN/ACK segments is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) * never scaled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130) tp->snd_wnd = ntohs(th->window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132) if (!tp->rx_opt.wscale_ok) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133) tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) tp->window_clamp = min(tp->window_clamp, 65535U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137) if (tp->rx_opt.saw_tstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) tp->rx_opt.tstamp_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139) tp->tcp_header_len =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142) tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144) tp->tcp_header_len = sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148) tcp_initialize_rcv_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150) /* Remember, tcp_poll() does not lock socket!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) * Change state from SYN-SENT only after copied_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152) * is initialized. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) smc_check_reset_syn(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159) tcp_finish_connect(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161) fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162) tcp_rcv_fastopen_synack(sk, skb, &foc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164) if (!sock_flag(sk, SOCK_DEAD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166) sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) if (fastopen_fail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) if (sk->sk_write_pending ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171) icsk->icsk_accept_queue.rskq_defer_accept ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172) inet_csk_in_pingpong_mode(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) /* Save one ACK. Data will be ready after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) * several ticks, if write_pending is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) * It may be deleted, but with this feature tcpdumps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177) * look so _wonderfully_ clever, that I was not able
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) * to stand against the temptation 8) --ANK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180) inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181) tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183) TCP_DELACK_MAX, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) /* No ACK in the segment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196) if (th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) /* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198) * "If the RST bit is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) * Otherwise (no ACK) drop the segment and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203) goto discard_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206) /* PAWS check. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208) tcp_paws_reject(&tp->rx_opt, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) goto discard_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) if (th->syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212) /* We see SYN without ACK. It is attempt of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213) * simultaneous connect with crossed SYNs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) * Particularly, it can be connect to self.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) tcp_set_state(sk, TCP_SYN_RECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218) if (tp->rx_opt.saw_tstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) tp->rx_opt.tstamp_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220) tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221) tp->tcp_header_len =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222) sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224) tp->tcp_header_len = sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227) WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228) WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229) tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) /* RFC1323: The window in SYN & SYN/ACK segments is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232) * never scaled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) tp->snd_wnd = ntohs(th->window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235) tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236) tp->max_window = tp->snd_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238) tcp_ecn_rcv_syn(tp, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) tcp_mtup_init(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242) tcp_initialize_rcv_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244) tcp_send_synack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) /* Note, we could accept data and URG from this segment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247) * There are no obstacles to make this (except that we must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248) * either change tcp_recvmsg() to prevent it from returning data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) * before 3WHS completes per RFC793, or employ TCP Fast Open).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251) * However, if we ignore data in ACKless segments sometimes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) * we have no reasons to accept it sometimes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) * Also, seems the code doing it in step6 of tcp_rcv_state_process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254) * is not flawless. So, discard packet for sanity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255) * Uncomment this return to process the data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) /* "fifth, if neither of the SYN or RST bits is set then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) * drop the segment and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266) discard_and_undo:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) tcp_clear_options(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268) tp->rx_opt.mss_clamp = saved_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271) reset_and_undo:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) tcp_clear_options(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273) tp->rx_opt.mss_clamp = saved_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279) struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) * undo. If peer SACKs triggered fast recovery, we can't undo here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) tcp_try_undo_loss(sk, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) /* Reset rtx states to prevent spurious retransmits_timed_out() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288) tcp_sk(sk)->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) inet_csk(sk)->icsk_retransmits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291) /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) * we no longer need req so release it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294) req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) lockdep_sock_is_held(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) reqsk_fastopen_remove(sk, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) /* Re-arm the timer because data may have been sent out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299) * This is similar to the regular data transmission case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300) * when new data has just been ack'ed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) * (TFO) - we could try to be more aggressive and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303) * retransmitting any data sooner based on when they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304) * are sent out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) tcp_rearm_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310) * This function implements the receiving procedure of RFC 793 for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311) * all states except ESTABLISHED and TIME_WAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312) * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313) * address independent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316) int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320) const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) int queued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) bool acceptable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326) case TCP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329) case TCP_LISTEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) if (th->ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) if (th->rst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) if (th->syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337) if (th->fin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) /* It is possible that we process SYN packets from backlog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) * so we need to make sure to disable BH and RCU right there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) if (!acceptable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) consume_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355) case TCP_SYN_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356) tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) queued = tcp_rcv_synsent_state_process(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) if (queued >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360) return queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) /* Do step6 onward by hand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) tcp_urg(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365) tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369) tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371) req = rcu_dereference_protected(tp->fastopen_rsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) lockdep_sock_is_held(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373) if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374) bool req_stolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376) WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377) sk->sk_state != TCP_FIN_WAIT1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379) if (!tcp_check_req(sk, skb, req, true, &req_stolen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383) if (!th->ack && !th->rst && !th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386) if (!tcp_validate_incoming(sk, skb, th, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389) /* step 5: check the ACK field */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390) acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391) FLAG_UPDATE_TS_RECENT |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392) FLAG_NO_CHALLENGE_ACK) > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394) if (!acceptable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) if (sk->sk_state == TCP_SYN_RECV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396) return 1; /* send one RST */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397) tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400) switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) case TCP_SYN_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403) if (!tp->srtt_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) tcp_synack_rtt_meas(sk, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406) if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407) tcp_rcv_synrecv_state_fastopen(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409) tcp_try_undo_spurious_syn(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411) tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412) skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) tcp_set_state(sk, TCP_ESTABLISHED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417) sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419) /* Note, that this wakeup is only for marginal crossed SYN case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) * Passively open sockets are not waked up, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) * sk->sk_sleep == NULL and sk->sk_socket == NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) if (sk->sk_socket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424) sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) if (tp->rx_opt.tstamp_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) if (!inet_csk(sk)->icsk_ca_ops->cong_control)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) tcp_update_pacing_rate(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436) /* Prevent spurious tcp_cwnd_restart() on first data packet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437) tp->lsndtime = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439) tcp_initialize_rcv_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440) tcp_fast_path_on(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443) case TCP_FIN_WAIT1: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) int tmo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) if (req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447) tcp_rcv_synrecv_state_fastopen(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) if (tp->snd_una != tp->write_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) tcp_set_state(sk, TCP_FIN_WAIT2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453) sk->sk_shutdown |= SEND_SHUTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) sk_dst_confirm(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457) if (!sock_flag(sk, SOCK_DEAD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458) /* Wake up lingering close() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463) if (tp->linger2 < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) /* Receive out of order FIN after close() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) if (tp->syn_fastopen && th->fin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) tcp_fastopen_active_disable(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) tmo = tcp_fin_time(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) if (tmo > TCP_TIMEWAIT_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481) } else if (th->fin || sock_owned_by_user(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) /* Bad case. We could lose such FIN otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483) * It is not a big problem, but it looks confusing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484) * and not so rare event. We still can lose it now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) * if it spins in bh_lock_sock(), but it is really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) * marginal case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) inet_csk_reset_keepalive_timer(sk, tmo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496) case TCP_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497) if (tp->snd_una == tp->write_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498) tcp_time_wait(sk, TCP_TIME_WAIT, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) case TCP_LAST_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) if (tp->snd_una == tp->write_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) tcp_update_metrics(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506) tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507) goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512) /* step 6: check the URG bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) tcp_urg(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515) /* step 7: process the segment text */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516) switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) case TCP_CLOSE_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) case TCP_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519) case TCP_LAST_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520) if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) if (sk_is_mptcp(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522) mptcp_incoming_options(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) case TCP_FIN_WAIT1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527) case TCP_FIN_WAIT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528) /* RFC 793 says to queue data in these states,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529) * RFC 1122 says we MUST send a reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) * BSD 4.4 also does reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) if (sk->sk_shutdown & RCV_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534) after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536) tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541) case TCP_ESTABLISHED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542) tcp_data_queue(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543) queued = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547) /* tcp_data could move socket to TIME-WAIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548) if (sk->sk_state != TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549) tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550) tcp_ack_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553) if (!queued) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555) tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) EXPORT_SYMBOL(tcp_rcv_state_process);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561) static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) struct inet_request_sock *ireq = inet_rsk(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565) if (family == AF_INET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) net_dbg_ratelimited("drop open request from %pI4/%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567) &ireq->ir_rmt_addr, port);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) #if IS_ENABLED(CONFIG_IPV6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569) else if (family == AF_INET6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570) net_dbg_ratelimited("drop open request from %pI6/%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) &ireq->ir_v6_rmt_addr, port);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) /* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577) * If we receive a SYN packet with these bits set, it means a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578) * network is playing bad games with TOS bits. In order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579) * avoid possible false congestion notifications, we disable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580) * TCP ECN negotiation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582) * Exception: tcp_ca wants ECN. This is required for DCTCP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583) * congestion control: Linux DCTCP asserts ECT on all packets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584) * including SYN, which is most optimal solution; however,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585) * others, such as FreeBSD do not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587) * Exception: At least one of the reserved bits of the TCP header (th->res1) is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588) * set, indicating the use of a future TCP extension (such as AccECN). See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589) * RFC8311 §4.3 which updates RFC3168 to allow the development of such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590) * extensions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) static void tcp_ecn_create_request(struct request_sock *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593) const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594) const struct sock *listen_sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595) const struct dst_entry *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597) const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) const struct net *net = sock_net(listen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) bool th_ecn = th->ece && th->cwr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600) bool ect, ecn_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601) u32 ecn_ok_dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603) if (!th_ecn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607) ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608) ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611) (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) tcp_bpf_ca_needs_ecn((struct sock *)req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613) inet_rsk(req)->ecn_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) static void tcp_openreq_init(struct request_sock *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617) const struct tcp_options_received *rx_opt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618) struct sk_buff *skb, const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) struct inet_request_sock *ireq = inet_rsk(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622) req->rsk_rcv_wnd = 0; /* So that tcp_send_synack() knows! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624) tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) tcp_rsk(req)->snt_synack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626) tcp_rsk(req)->last_oow_ack_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) req->mss = rx_opt->mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629) ireq->tstamp_ok = rx_opt->tstamp_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630) ireq->sack_ok = rx_opt->sack_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) ireq->snd_wscale = rx_opt->snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) ireq->wscale_ok = rx_opt->wscale_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633) ireq->acked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634) ireq->ecn_ok = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635) ireq->ir_rmt_port = tcp_hdr(skb)->source;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636) ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) ireq->ir_mark = inet_request_mark(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) #if IS_ENABLED(CONFIG_SMC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639) ireq->smc_ok = rx_opt->smc_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644) struct sock *sk_listener,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) bool attach_listener)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647) struct request_sock *req = reqsk_alloc(ops, sk_listener,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) attach_listener);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651) struct inet_request_sock *ireq = inet_rsk(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) ireq->ireq_opt = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654) #if IS_ENABLED(CONFIG_IPV6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655) ireq->pktopts = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) atomic64_set(&ireq->ir_cookie, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658) ireq->ireq_state = TCP_NEW_SYN_RECV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) write_pnet(&ireq->ireq_net, sock_net(sk_listener));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660) ireq->ireq_family = sk_listener->sk_family;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) EXPORT_SYMBOL(inet_reqsk_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668) * Return true if a syncookie should be sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670) static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672) struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673) const char *msg = "Dropping request";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) bool want_cookie = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677) #ifdef CONFIG_SYN_COOKIES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) if (net->ipv4.sysctl_tcp_syncookies) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) msg = "Sending cookies";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680) want_cookie = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) if (!queue->synflood_warned &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) net->ipv4.sysctl_tcp_syncookies != 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688) xchg(&queue->synflood_warned, 1) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) proto, sk->sk_num, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692) return want_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695) static void tcp_reqsk_record_syn(const struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) struct request_sock *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) if (tcp_sk(sk)->save_syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700) u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) struct saved_syn *saved_syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702) u32 mac_hdrlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703) void *base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705) if (tcp_sk(sk)->save_syn == 2) { /* Save full header. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706) base = skb_mac_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707) mac_hdrlen = skb_mac_header_len(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708) len += mac_hdrlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710) base = skb_network_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711) mac_hdrlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) saved_syn = kmalloc(struct_size(saved_syn, data, len),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716) if (saved_syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) saved_syn->mac_hdrlen = mac_hdrlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) saved_syn->network_hdrlen = skb_network_header_len(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719) saved_syn->tcp_hdrlen = tcp_hdrlen(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720) memcpy(saved_syn->data, base, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6721) req->saved_syn = saved_syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6726) /* If a SYN cookie is required and supported, returns a clamped MSS value to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6727) * used for SYN cookie generation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6728) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6729) u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6730) const struct tcp_request_sock_ops *af_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6731) struct sock *sk, struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6732) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6733) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6734) u16 mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6736) if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6737) !inet_csk_reqsk_queue_is_full(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6738) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6740) if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6741) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6743) if (sk_acceptq_is_full(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6744) NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6745) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6746) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6748) mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6749) if (!mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6750) mss = af_ops->mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6752) return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6754) EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6756) int tcp_conn_request(struct request_sock_ops *rsk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6757) const struct tcp_request_sock_ops *af_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6758) struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6760) struct tcp_fastopen_cookie foc = { .len = -1 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6761) __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6762) struct tcp_options_received tmp_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6763) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6764) struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6765) struct sock *fastopen_sk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6766) struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6767) bool want_cookie = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6768) struct dst_entry *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6769) struct flowi fl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6771) /* TW buckets are converted to open requests without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6772) * limitations, they conserve resources and peer is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6773) * evidently real one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6774) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6775) if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6776) inet_csk_reqsk_queue_is_full(sk)) && !isn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6777) want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6778) if (!want_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6779) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6782) if (sk_acceptq_is_full(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6783) NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6784) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6787) req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6788) if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6789) goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6791) req->syncookie = want_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6792) tcp_rsk(req)->af_specific = af_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6793) tcp_rsk(req)->ts_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6794) #if IS_ENABLED(CONFIG_MPTCP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6795) tcp_rsk(req)->is_mptcp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6796) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6798) tcp_clear_options(&tmp_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6799) tmp_opt.mss_clamp = af_ops->mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6800) tmp_opt.user_mss = tp->rx_opt.user_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6801) tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6802) want_cookie ? NULL : &foc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6804) if (want_cookie && !tmp_opt.saw_tstamp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6805) tcp_clear_options(&tmp_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6807) if (IS_ENABLED(CONFIG_SMC) && want_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6808) tmp_opt.smc_ok = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6810) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6811) tcp_openreq_init(req, &tmp_opt, skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6812) inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6814) /* Note: tcp_v6_init_req() might override ir_iif for link locals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6815) inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6817) af_ops->init_req(req, sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6819) if (security_inet_conn_request(sk, skb, req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6820) goto drop_and_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6822) if (tmp_opt.tstamp_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6823) tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6825) dst = af_ops->route_req(sk, &fl, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6826) if (!dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6827) goto drop_and_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6829) if (!want_cookie && !isn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6830) /* Kill the following clause, if you dislike this way. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6831) if (!net->ipv4.sysctl_tcp_syncookies &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6832) (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6833) (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6834) !tcp_peer_is_proven(req, dst)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6835) /* Without syncookies last quarter of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6836) * backlog is filled with destinations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6837) * proven to be alive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6838) * It means that we continue to communicate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6839) * to destinations, already remembered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6840) * to the moment of synflood.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6841) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6842) pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6843) rsk_ops->family);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6844) goto drop_and_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6847) isn = af_ops->init_seq(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6850) tcp_ecn_create_request(req, skb, sk, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6852) if (want_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6853) isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6854) if (!tmp_opt.tstamp_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6855) inet_rsk(req)->ecn_ok = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6858) tcp_rsk(req)->snt_isn = isn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6859) tcp_rsk(req)->txhash = net_tx_rndhash();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6860) tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6861) tcp_openreq_init_rwin(req, sk, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6862) sk_rx_queue_set(req_to_sk(req), skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6863) if (!want_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6864) tcp_reqsk_record_syn(sk, req, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6865) fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6867) if (fastopen_sk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6868) af_ops->send_synack(fastopen_sk, dst, &fl, req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6869) &foc, TCP_SYNACK_FASTOPEN, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6870) /* Add the child socket directly into the accept queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6871) if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6872) reqsk_fastopen_remove(fastopen_sk, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6873) bh_unlock_sock(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6874) sock_put(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6875) goto drop_and_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6877) sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6878) bh_unlock_sock(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6879) sock_put(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6880) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6881) tcp_rsk(req)->tfo_listener = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6882) if (!want_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6883) inet_csk_reqsk_queue_hash_add(sk, req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6884) tcp_timeout_init((struct sock *)req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6885) af_ops->send_synack(sk, dst, &fl, req, &foc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6886) !want_cookie ? TCP_SYNACK_NORMAL :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6887) TCP_SYNACK_COOKIE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6888) skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6889) if (want_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6890) reqsk_free(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6891) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6894) reqsk_put(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6895) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6897) drop_and_release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6898) dst_release(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6899) drop_and_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6900) __reqsk_free(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6901) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6902) tcp_listendrop(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6903) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6905) EXPORT_SYMBOL(tcp_conn_request);