Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * INET		An implementation of the TCP/IP protocol suite for the LINUX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *		operating system.  INET is implemented using the  BSD Socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *		interface as the means of communication with the user level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *		Implementation of the Transmission Control Protocol(TCP).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * Authors:	Ross Biro
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  *		Florian La Roche, <flla@stud.uni-sb.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  *		Matthew Dillon, <dillon@apollo.west.oic.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  *		Jorge Cwik, <jorge@laser.satlink.net>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23)  * Changes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24)  *		Pedro Roque	:	Fast Retransmit/Recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25)  *					Two receive queues.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  *					Retransmit queue handled by TCP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27)  *					Better retransmit timer handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  *					New congestion avoidance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  *					Header prediction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  *					Variable renaming.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  *		Eric		:	Fast Retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  *		Randy Scott	:	MSS option defines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  *		Eric Schenk	:	Fixes to slow start algorithm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  *		Eric Schenk	:	Yet another double ACK bug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  *		Eric Schenk	:	Delayed ACK bug fixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  *		Eric Schenk	:	Floyd style fast retrans war avoidance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  *		David S. Miller	:	Don't allow zero congestion window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  *		Eric Schenk	:	Fix retransmitter so that it sends
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  *					next packet on ack of previous packet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  *		Andi Kleen	:	Moved open_request checking here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  *					and process RSTs for open_requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  *		Andi Kleen	:	Better prune_queue, and other fixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  *		Andrey Savochkin:	Fix RTT measurements in the presence of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  *					timestamps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  *		Andrey Savochkin:	Check sequence numbers correctly when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  *					removing SACKs due to in sequence incoming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  *					data segments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  *		Andi Kleen:		Make sure we never ack data there is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  *					enough room for. Also make this condition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  *					a fatal error if it might still happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  *		Andi Kleen:		Add tcp_measure_rcv_mss to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  *					connections with MSS<min(MTU,ann. MSS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  *					work without delayed acks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  *		Andi Kleen:		Process packets with PSH set in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  *					fast path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  *		J Hadi Salim:		ECN support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  *	 	Andrei Gurtov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  *		Pasi Sarolahti,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  *		Panu Kuhlberg:		Experimental audit of TCP (re)transmission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  *					engine. Lots of bugs are found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  *		Pasi Sarolahti:		F-RTO for dealing with spurious RTOs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #define pr_fmt(fmt) "TCP: " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) #include <net/dst.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) #include <net/tcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) #include <net/inet_common.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) #include <linux/ipsec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) #include <asm/unaligned.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) #include <linux/errqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) #include <trace/events/tcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) #include <linux/jump_label_ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) #include <net/busy_poll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) #include <net/mptcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) #include <trace/hooks/net.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) #define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) #define FLAG_RETRANS_DATA_ACKED	0x08 /* "" "" some of which was retransmitted.	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) #define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) #define FLAG_ECE		0x40 /* ECE in this ACK				*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) #define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) #define FLAG_SET_XMIT_TIMER	0x1000 /* Set TLP or RTO timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) #define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) #define FLAG_ACK_MAYBE_DELAYED	0x10000 /* Likely a delayed ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) #define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) #define REXMIT_NONE	0 /* no loss recovery to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) #define REXMIT_LOST	1 /* retransmit packets marked lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) #define REXMIT_NEW	2 /* FRTO-style transmit of unsent/new packets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) #if IS_ENABLED(CONFIG_TLS_DEVICE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) void clean_acked_data_enable(struct inet_connection_sock *icsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 			     void (*cad)(struct sock *sk, u32 ack_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	icsk->icsk_clean_acked = cad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	static_branch_deferred_inc(&clean_acked_data_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) EXPORT_SYMBOL_GPL(clean_acked_data_enable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) void clean_acked_data_disable(struct inet_connection_sock *icsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	static_branch_slow_dec_deferred(&clean_acked_data_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	icsk->icsk_clean_acked = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) EXPORT_SYMBOL_GPL(clean_acked_data_disable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) void clean_acked_data_flush(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	static_key_deferred_flush(&clean_acked_data_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) EXPORT_SYMBOL_GPL(clean_acked_data_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) #ifdef CONFIG_CGROUP_BPF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	bool unknown_opt = tcp_sk(sk)->rx_opt.saw_unknown &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 				       BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	bool parse_all_opt = BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 						    BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	struct bpf_sock_ops_kern sock_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	if (likely(!unknown_opt && !parse_all_opt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	/* The skb will be handled in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	 * bpf_skops_established() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	 * bpf_skops_write_hdr_opt().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	case TCP_SYN_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	case TCP_SYN_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	case TCP_LISTEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	sock_owned_by_me(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	sock_ops.is_fullsock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	sock_ops.sk = sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) static void bpf_skops_established(struct sock *sk, int bpf_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 				  struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	struct bpf_sock_ops_kern sock_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	sock_owned_by_me(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	sock_ops.op = bpf_op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	sock_ops.is_fullsock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	sock_ops.sk = sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	/* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	if (skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 		bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) static void bpf_skops_established(struct sock *sk, int bpf_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 				  struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 			     unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	static bool __once __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	if (!__once) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 		struct net_device *dev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 		__once = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 		dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		if (!dev || len >= dev->mtu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 			pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 				dev ? dev->name : "Unknown driver");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) /* Adapt the MSS value used to make delayed ack decision to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225)  * real world.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	const unsigned int lss = icsk->icsk_ack.last_seg_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	unsigned int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	icsk->icsk_ack.last_seg_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	/* skb->len may jitter because of SACKs, even if peer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	 * sends good full-sized frames.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	len = skb_shinfo(skb)->gso_size ? : skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	if (len >= icsk->icsk_ack.rcv_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 		icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 					       tcp_sk(sk)->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 		/* Account for possibly-removed options */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 		if (unlikely(len > icsk->icsk_ack.rcv_mss +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 				   MAX_TCP_OPTION_SPACE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 			tcp_gro_dev_warn(sk, skb, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 		/* Otherwise, we make more careful check taking into account,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 		 * that SACKs block is variable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 		 * "len" is invariant segment length, including TCP header.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 		len += skb->data - skb_transport_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 		if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 		    /* If PSH is not set, packet should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 		     * full sized, provided peer TCP is not badly broken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 		     * This observation (if it is correct 8)) allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		     * to handle super-low mtu links fairly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 		     */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		     !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 			/* Subtract also invariant (if peer is RFC compliant),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 			 * tcp header plus fixed timestamp option length.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 			 * Resulting "len" is MSS free of SACK jitter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 			len -= tcp_sk(sk)->tcp_header_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 			icsk->icsk_ack.last_seg_size = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 			if (len == lss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 				icsk->icsk_ack.rcv_mss = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 			icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	if (quickacks == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 		quickacks = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	quickacks = min(quickacks, max_quickacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	if (quickacks > icsk->icsk_ack.quick)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 		icsk->icsk_ack.quick = quickacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	tcp_incr_quickack(sk, max_quickacks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	inet_csk_exit_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	icsk->icsk_ack.ato = TCP_ATO_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) EXPORT_SYMBOL(tcp_enter_quickack_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) /* Send ACKs quickly, if "quick" count is not exhausted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301)  * and the session is not interactive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) static bool tcp_in_quickack_mode(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	const struct dst_entry *dst = __sk_dst_get(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 		(icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	if (tp->ecn_flags & TCP_ECN_OK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	if (tcp_hdr(skb)->cwr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 		tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 		/* If the sender is telling us it has entered CWR, then its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		 * cwnd may be very low (even just 1 packet), so we should ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		 * immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 			inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 	switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	case INET_ECN_NOT_ECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 		/* Funny extension: if ECT is not set on a segment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 		 * and we already seen ECT on a previous segment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 		 * it is probably a retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		if (tp->ecn_flags & TCP_ECN_SEEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 			tcp_enter_quickack_mode(sk, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	case INET_ECN_CE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 		if (tcp_ca_needs_ecn(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 			tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 			/* Better not delay acks, sender can have a very low cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 			tcp_enter_quickack_mode(sk, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		tp->ecn_flags |= TCP_ECN_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 		if (tcp_ca_needs_ecn(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 			tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		tp->ecn_flags |= TCP_ECN_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 		__tcp_ecn_check_ce(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 		tp->ecn_flags &= ~TCP_ECN_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		tp->ecn_flags &= ~TCP_ECN_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) /* Buffer size and advertised window tuning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397)  * 1. Tuning sk->sk_sndbuf, when connection enters established state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) static void tcp_sndbuf_expand(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	int sndmem, per_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	u32 nr_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	/* Worst case is non GSO/TSO : each frame consumes one skb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	 * and skb->head is kmalloced using power of two area of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		  MAX_TCP_HEADER +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 		  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	per_mss = roundup_pow_of_two(per_mss) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		  SKB_DATA_ALIGN(sizeof(struct sk_buff));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	/* Fast Recovery (RFC 5681 3.2) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	 * Cubic needs 1.7 factor, rounded to 2 to include
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	 * extra cushion (application might react slowly to EPOLLOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	sndmem *= nr_segs * per_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	if (sk->sk_sndbuf < sndmem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		WRITE_ONCE(sk->sk_sndbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 			   min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434)  * All tcp_full_space() is split to two parts: "network" buffer, allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435)  * forward and advertised in receiver window (tp->rcv_wnd) and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436)  * "application buffer", required to isolate scheduling/application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437)  * latencies from network.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438)  * window_clamp is maximal advertised window. It can be less than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439)  * tcp_full_space(), in this case tcp_full_space() - window_clamp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440)  * is reserved for "application" buffer. The less window_clamp is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441)  * the smoother our behaviour from viewpoint of network, but the lower
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442)  * throughput and the higher sensitivity of the connection to losses. 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444)  * rcv_ssthresh is more strict window_clamp used at "slow start"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445)  * phase to predict further behaviour of this connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446)  * It is used for two goals:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447)  * - to enforce header prediction at sender, even when application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448)  *   requires some significant "application buffer". It is check #1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449)  * - to prevent pruning of receive queue because of misprediction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450)  *   of receiver window. Check #2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452)  * The scheme does not work when sender sends good segments opening
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453)  * window and then starts to feed us spaghetti. But it should work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454)  * in common situations. Otherwise, we have to rely on queue collapsing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) /* Slow part of check#2. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	/* Optimize this! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	while (tp->rcv_ssthresh <= window) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 		if (truesize <= skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 			return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		truesize >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		window >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	int room;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	/* Check #1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	if (room > 0 && !tcp_under_memory_pressure(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 		int incr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		/* Check #2. Increase window, if skb with such overhead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 		 * will fit to rcvbuf in future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 		if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 			incr = 2 * tp->advmss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 			incr = __tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 		if (incr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 			incr = max_t(int, incr, 2 * skb->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 			tp->rcv_ssthresh += min(room, incr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 			inet_csk(sk)->icsk_ack.quick |= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) /* 3. Try to fixup all. It is made immediately after connection enters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503)  *    established state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) static void tcp_init_buffer_space(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	int maxwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		tcp_sndbuf_expand(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	tp->rcvq_space.time = tp->tcp_mstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	tp->rcvq_space.seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	maxwin = tcp_full_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	if (tp->window_clamp >= maxwin) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		tp->window_clamp = maxwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 		if (tcp_app_win && maxwin > 4 * tp->advmss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 			tp->window_clamp = max(maxwin -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 					       (maxwin >> tcp_app_win),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 					       4 * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	/* Force reservation of one segment. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	if (tcp_app_win &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 	    tp->window_clamp > 2 * tp->advmss &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	    tp->window_clamp + tp->advmss > maxwin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 		tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 				    (u32)TCP_INIT_CWND * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) /* 4. Recalculate window clamp after socket hit its memory bounds. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) static void tcp_clamp_window(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	icsk->icsk_ack.quick = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 	if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	    !tcp_under_memory_pressure(sk) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	    sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 		WRITE_ONCE(sk->sk_rcvbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 			   min(atomic_read(&sk->sk_rmem_alloc),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 			       net->ipv4.sysctl_tcp_rmem[2]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) /* Initialize RCV_MSS value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563)  * RCV_MSS is an our guess about MSS used by the peer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564)  * We haven't any direct information about the MSS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565)  * It's better to underestimate the RCV_MSS rather than overestimate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566)  * Overestimations make us ACKing less frequently than needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567)  * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) void tcp_initialize_rcv_mss(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	hint = min(hint, tp->rcv_wnd / 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	hint = min(hint, TCP_MSS_DEFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	hint = max(hint, TCP_MIN_MSS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	inet_csk(sk)->icsk_ack.rcv_mss = hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) EXPORT_SYMBOL(tcp_initialize_rcv_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) /* Receiver "autotuning" code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584)  * The algorithm for RTT estimation w/o timestamps is based on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585)  * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586)  * <https://public.lanl.gov/radiant/pubs.html#DRS>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588)  * More detail on this code can be found at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589)  * <http://staff.psc.edu/jheffner/>,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590)  * though this reference is out of date.  A new paper
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591)  * is pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	u32 new_sample = tp->rcv_rtt_est.rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	long m = sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	if (new_sample != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		/* If we sample in larger samples in the non-timestamp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		 * case, we could grossly overestimate the RTT especially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 		 * with chatty applications or bulk transfer apps which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		 * are stalled on filesystem I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		 * Also, since we are only going for a minimum in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 		 * non-timestamp case, we do not smooth things out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 		 * else with timestamps disabled convergence takes too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 		 * long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 		if (!win_dep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 			m -= (new_sample >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 			new_sample += m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 			m <<= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 			if (m < new_sample)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 				new_sample = m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		/* No previous measure. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 		new_sample = m << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	tp->rcv_rtt_est.rtt_us = new_sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	u32 delta_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	if (tp->rcv_rtt_est.time == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		goto new_measure;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	if (!delta_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 		delta_us = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	tcp_rcv_rtt_update(tp, delta_us, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) new_measure:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	tp->rcv_rtt_est.time = tp->tcp_mstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 					  const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	if (TCP_SKB_CB(skb)->end_seq -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	    TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		u32 delta_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 			if (!delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 				delta = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 			delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 			tcp_rcv_rtt_update(tp, delta_us, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667)  * This function should be called every time data is copied to user space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668)  * It calculates the appropriate TCP receive buffer space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) void tcp_rcv_space_adjust(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	u32 copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	int time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	trace_tcp_rcv_space_adjust(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	/* Number of bytes copied to user in last RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	copied = tp->copied_seq - tp->rcvq_space.seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	if (copied <= tp->rcvq_space.space)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		goto new_measure;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	/* A bit of theory :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	 * copied = bytes received in previous RTT, our base window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	 * To cope with packet losses, we need a 2x factor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	 * To cope with slow start, and sender growing its cwin by 100 %
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	 * every RTT, we need a 4x factor, because the ACK we are sending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	 * now is for the next RTT, not the current one :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	 * <prev RTT . ><current RTT .. ><next RTT .... >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		int rcvmem, rcvbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 		u64 rcvwin, grow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 		/* minimal window to cope with packet losses, assuming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 		 * steady state. Add some cushion because of small variations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 		rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 		/* Accommodate for sender rate increase (eg. slow start) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 		grow = rcvwin * (copied - tp->rcvq_space.space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 		do_div(grow, tp->rcvq_space.space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 		rcvwin += (grow << 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 		rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 			rcvmem += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 		do_div(rcvwin, tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 		rcvbuf = min_t(u64, rcvwin * rcvmem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 			       sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 		if (rcvbuf > sk->sk_rcvbuf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 			WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 			/* Make the window clamp follow along.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 			tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	tp->rcvq_space.space = copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) new_measure:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	tp->rcvq_space.seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	tp->rcvq_space.time = tp->tcp_mstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) /* There is something which you must keep in mind when you analyze the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734)  * behavior of the tp->ato delayed ack timeout interval.  When a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735)  * connection starts up, we want to ack as quickly as possible.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736)  * problem is that "good" TCP's do slow start at the beginning of data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737)  * transmission.  The means that until we send the first few ACK's the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738)  * sender will sit on his end and only queue most of his data, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739)  * he can only send snd_cwnd unacked packets at any given time.  For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740)  * each ACK we send, he increments snd_cwnd and transmits more of his
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741)  * queue.  -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	u32 now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	tcp_measure_rcv_mss(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	tcp_rcv_rtt_measure(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	now = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	if (!icsk->icsk_ack.ato) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 		/* The _first_ data packet received, initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 		 * delayed ACK engine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 		icsk->icsk_ack.ato = TCP_ATO_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		int m = now - icsk->icsk_ack.lrcvtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		if (m <= TCP_ATO_MIN / 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 			/* The fastest case is the first. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 			icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		} else if (m < icsk->icsk_ack.ato) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 			icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 			if (icsk->icsk_ack.ato > icsk->icsk_rto)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 				icsk->icsk_ack.ato = icsk->icsk_rto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 		} else if (m > icsk->icsk_rto) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 			/* Too long gap. Apparently sender failed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 			 * restart window, so that we send ACKs quickly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 			tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 			sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	icsk->icsk_ack.lrcvtime = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	tcp_ecn_check_ce(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	if (skb->len >= 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) /* Called to compute a smoothed rtt estimate. The data fed to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790)  * routine either comes from timestamps, or from segments that were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791)  * known _not_ to have been retransmitted [see Karn/Partridge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792)  * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793)  * piece by Van Jacobson.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794)  * NOTE: the next three routines used to be one big routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795)  * To save cycles in the RFC 1323 implementation it was better to break
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796)  * it up into three procedures. -- erics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	long m = mrtt_us; /* RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 	u32 srtt = tp->srtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	/*	The following amusing code comes from Jacobson's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	 *	article in SIGCOMM '88.  Note that rtt and mdev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	 *	are scaled versions of rtt and mean deviation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 	 *	This is designed to be as fast as possible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 	 *	m stands for "measurement".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	 *	On a 1990 paper the rto value is changed to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 	 *	RTO = rtt + 4 * mdev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	 * Funny. This algorithm seems to be very broken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	 * These formulae increase RTO, when it should be decreased, increase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	 * too slowly, when it should be increased quickly, decrease too quickly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	 * does not matter how to _calculate_ it. Seems, it was trap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	 * that VJ failed to avoid. 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	if (srtt != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 		m -= (srtt >> 3);	/* m is now error in rtt est */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		if (m < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 			m = -m;		/* m is now abs(error) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 			/* This is similar to one of Eifel findings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 			 * Eifel blocks mdev updates when rtt decreases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 			 * This solution is a bit different: we use finer gain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 			 * for mdev in this case (alpha*beta).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 			 * Like Eifel it also prevents growth of rto,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 			 * but also it limits too fast rto decreases,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 			 * happening in pure Eifel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 			if (m > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 				m >>= 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 		tp->mdev_us += m;		/* mdev = 3/4 mdev + 1/4 new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		if (tp->mdev_us > tp->mdev_max_us) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 			tp->mdev_max_us = tp->mdev_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 			if (tp->mdev_max_us > tp->rttvar_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 				tp->rttvar_us = tp->mdev_max_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 		if (after(tp->snd_una, tp->rtt_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 			if (tp->mdev_max_us < tp->rttvar_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 				tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 			tp->rtt_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 			tp->mdev_max_us = tcp_rto_min_us(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 			tcp_bpf_rtt(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 		/* no previous measure. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 		srtt = m << 3;		/* take the measured time to be rtt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 		tp->mdev_us = m << 1;	/* make sure rto = 3*rtt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		tp->mdev_max_us = tp->rttvar_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		tp->rtt_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 		tcp_bpf_rtt(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	tp->srtt_us = max(1U, srtt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) static void tcp_update_pacing_rate(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	u64 rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	/* current rate is (cwnd * mss) / srtt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	 * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	 * In Congestion Avoidance phase, set it to 120 % the current rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	 * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	 *	 If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	 *	 end of slow start and should slow down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	if (tp->snd_cwnd < tp->snd_ssthresh / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	rate *= max(tp->snd_cwnd, tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	if (likely(tp->srtt_us))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		do_div(rate, tp->srtt_us);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	/* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	 * without any lock. We want to make sure compiler wont store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 	 * intermediate values in this location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 					     sk->sk_max_pacing_rate));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) /* Calculate rto without backoff.  This is the second half of Van Jacobson's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901)  * routine referred to above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) static void tcp_set_rto(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	/* Old crap is replaced with new one. 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	 * More seriously:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	 * 1. If rtt variance happened to be less 50msec, it is hallucination.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	 *    It cannot be less due to utterly erratic ACK generation made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	 *    to do with delayed acks, because at cwnd>2 true delack timeout
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	 *    is invisible. Actually, Linux-2.4 also generates erratic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	 *    ACKs in some circumstances.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	/* 2. Fixups made earlier cannot be right.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	 *    If we do not estimate RTO correctly without them,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	 *    all the algo is pure shit and should be replaced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	 *    with correct one. It is exactly, which we pretend to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	 * guarantees that rto is higher.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	tcp_bound_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	if (!cwnd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 		cwnd = TCP_INIT_CWND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) struct tcp_sacktag_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	/* Timestamps for earliest and latest never-retransmitted segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	 * but congestion control should still get an accurate delay signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	u64	first_sackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	u64	last_sackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	u32	reord;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	u32	sack_delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	int	flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	unsigned int mss_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	struct rate_sample *rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) /* Take a notice that peer is sending D-SACKs. Skip update of data delivery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954)  * and spurious retransmission information if this DSACK is unlikely caused by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955)  * sender's action:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956)  * - DSACKed sequence range is larger than maximum receiver's window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957)  * - Total no. of DSACKed segments exceed the total no. of retransmitted segs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 			  u32 end_seq, struct tcp_sacktag_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	u32 seq_len, dup_segs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	if (!before(start_seq, end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 	seq_len = end_seq - start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	/* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	if (seq_len > tp->max_window)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	if (seq_len > tp->mss_cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 		dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	tp->dsack_dups += dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	/* Skip the DSACK if dup segs weren't retransmitted by sender */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	if (tp->dsack_dups > tp->total_retrans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	tp->rack.dsack_seen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	state->flag |= FLAG_DSACKING_ACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	/* A spurious retransmission is delivered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	state->sack_delivered += dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	return dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) /* It's reordering when higher sequence was delivered (i.e. sacked) before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990)  * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991)  * distance is approximated in full-mss packet distance ("reordering").
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 				      const int ts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	const u32 mss = tp->mss_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	u32 fack, metric;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	fack = tcp_highest_sack_seq(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	if (!before(low_seq, fack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	metric = fack - low_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	if ((metric > tp->reordering * mss) && mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) #if FASTRETRANS_DEBUG > 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 		pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 			 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 			 tp->reordering,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 			 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 			 tp->sacked_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 			 tp->undo_marker ? tp->undo_retrans : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 		tp->reordering = min_t(u32, (metric + mss - 1) / mss,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 				       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	/* This exciting event is worth to be remembered. 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	tp->reord_seen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		      ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024)  /* This must be called before lost_out or retrans_out are updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)   * on a new loss, because we want to know if all skbs previously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)   * known to be lost have already been retransmitted, indicating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)   * that this newly lost skb is our next skb to retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)   */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	    (tp->retransmit_skb_hint &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	     before(TCP_SKB_CB(skb)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 		    TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 		tp->retransmit_skb_hint = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) /* Sum the number of packets on the wire we have marked as lost, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)  * notify the congestion control module that the given skb was marked lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	tp->lost += tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	__u8 sacked = TCP_SKB_CB(skb)->sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	if (sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	tcp_verify_retransmit_hint(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	if (sacked & TCPCB_LOST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 		if (sacked & TCPCB_SACKED_RETRANS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 			/* Account for retransmits that are lost again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 			tp->retrans_out -= tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 			NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 				      tcp_skb_pcount(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 			tcp_notify_skb_loss_event(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 		tp->lost_out += tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		tcp_notify_skb_loss_event(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) /* Updates the delivered and delivered_ce counts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 				bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	tp->delivered += delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	if (ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 		tp->delivered_ce += delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) /* This procedure tags the retransmission queue when SACKs arrive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)  * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)  * Packets in queue with these bits set are counted in variables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)  * sacked_out, retrans_out and lost_out, correspondingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)  * Valid combinations are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)  * Tag  InFlight	Description
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)  * 0	1		- orig segment is in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)  * S	0		- nothing flies, orig reached receiver.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)  * L	0		- nothing flies, orig lost by net.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)  * R	2		- both orig and retransmit are in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)  * L|R	1		- orig is lost, retransmit is in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)  * S|R  1		- orig reached receiver, retrans is still in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)  * (L|S|R is logically valid, it could occur when L|R is sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)  *  but it is equivalent to plain S and code short-curcuits it to S.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)  *  L|S is logically invalid, it would mean -1 packet in flight 8))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)  * These 6 states form finite state machine, controlled by the following events:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)  * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)  * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)  * 3. Loss detection event of two flavors:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)  *	A. Scoreboard estimator decided the packet is lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)  *	   A'. Reno "three dupacks" marks head of queue lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)  *	B. SACK arrives sacking SND.NXT at the moment, when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)  *	   segment was retransmitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)  * 4. D-SACK added new rule: D-SACK changes any tag to S.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)  * It is pleasant to note, that state diagram turns out to be commutative,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)  * so that we are allowed not to be bothered by order of our actions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)  * when multiple events arrive simultaneously. (see the function below).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)  * Reordering detection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)  * --------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)  * Reordering metric is maximal distance, which a packet can be displaced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)  * in packet stream. With SACKs we can estimate it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117)  * 1. SACK fills old hole and the corresponding segment was not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)  *    ever retransmitted -> reordering. Alas, we cannot use it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)  *    when segment was retransmitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120)  * 2. The last flaw is solved with D-SACK. D-SACK arrives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)  *    for retransmitted and already SACKed segment -> reordering..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)  * Both of these heuristics are not used in Loss state, when we cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)  * account for retransmits accurately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)  * SACK block validation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)  * ----------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)  * SACK block range validation checks that the received SACK block fits to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)  * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)  * Note that SND.UNA is not included to the range though being valid because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)  * it means that the receiver is rather inconsistent with itself reporting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)  * SACK reneging when it should advance SND.UNA. Such SACK block this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)  * perfectly valid, however, in light of RFC2018 which explicitly states
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)  * that "SACK block MUST reflect the newest segment.  Even if the newest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)  * segment is going to be discarded ...", not that it looks very clever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)  * in case of head skb. Due to potentional receiver driven attacks, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)  * choose to avoid immediate execution of a walk in write queue due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)  * reneging and defer head skb's loss recovery to standard loss recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)  * procedure that will eventually trigger (nothing forbids us doing this).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141)  * Implements also blockage to start_seq wrap-around. Problem lies in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142)  * fact that though start_seq (s) is before end_seq (i.e., not reversed),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)  * there's no guarantee that it will be before snd_nxt (n). The problem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)  * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)  * wrap (s_w):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)  *         <- outs wnd ->                          <- wrapzone ->
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)  *         u     e      n                         u_w   e_w  s n_w
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149)  *         |     |      |                          |     |   |  |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)  * |<------------+------+----- TCP seqno space --------------+---------->|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)  * ...-- <2^31 ->|                                           |<--------...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)  * ...---- >2^31 ------>|                                    |<--------...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)  * Current code wouldn't be vulnerable but it's better still to discard such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)  * crazy SACK blocks. Doing this check for start_seq alone closes somewhat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)  * similar case (end_seq after snd_nxt wrap) as earlier reversed check in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)  * snd_nxt wrap -> snd_una region will then become "well defined", i.e.,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)  * equal to the ideal case (infinite seqno space without wrap caused issues).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)  * With D-SACK the lower bound is extended to cover sequence space below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)  * SND.UNA down to undo_marker, which is the last point of interest. Yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)  * again, D-SACK block must not to go across snd_una (for the same reason as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)  * for the normal SACK blocks, explained above). But there all simplicity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)  * ends, TCP might receive valid D-SACKs below that. As long as they reside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)  * fully below undo_marker they do not affect behavior in anyway and can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)  * therefore be safely ignored. In rare cases (which are more or less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)  * theoretical ones), the D-SACK will nicely cross that boundary due to skb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168)  * fragmentation and packet reordering past skb's retransmission. To consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)  * them correctly, the acceptable range must be extended even more though
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)  * the exact amount is rather hard to quantify. However, tp->max_window can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)  * be used as an exaggerated estimate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 				   u32 start_seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	/* Too far in future, or reversed (interpretation is ambiguous) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	/* Nasty start_seq wrap-around check (see comments above) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	if (!before(start_seq, tp->snd_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	/* In outstanding window? ...This is valid exit for D-SACKs too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	 * start_seq == snd_una is non-sensical (see comments above)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	if (after(start_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	if (!is_dsack || !tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	/* ...Then it's D-SACK, and must reside below snd_una completely */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	if (after(end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	if (!before(start_seq, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	/* Too old */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	if (!after(end_seq, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	/* Undo_marker boundary crossing (overestimates a lot). Known already:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	 *   start_seq < undo_marker and end_seq >= undo_marker.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	return !before(start_seq, end_seq - tp->max_window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 			    struct tcp_sack_block_wire *sp, int num_sacks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 			    u32 prior_snd_una, struct tcp_sacktag_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	u32 dup_segs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	} else if (num_sacks > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 		u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 		u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	if (!dup_segs) {	/* Skip dubious DSACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	/* D-SACK for already forgotten data... Do dumb counting. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	if (tp->undo_marker && tp->undo_retrans > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	    !after(end_seq_0, prior_snd_una) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	    after(end_seq_0, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 		tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) /* Check if skb is fully within the SACK block. In presence of GSO skbs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)  * the incoming SACK may not exactly match but we can find smaller MSS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)  * aligned portion of it that matches. Therefore we might need to fragment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)  * which may fail and creates some hassle (caller must handle error case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)  * returns).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)  * FIXME: this could be merged to shift decision code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 				  u32 start_seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	bool in_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	unsigned int pkt_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	unsigned int mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		  !before(end_seq, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	if (tcp_skb_pcount(skb) > 1 && !in_sack &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	    after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		mss = tcp_skb_mss(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 		if (!in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 			pkt_len = start_seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 			if (pkt_len < mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 				pkt_len = mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 			pkt_len = end_seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 			if (pkt_len < mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 				return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		/* Round if necessary so that SACKs cover only full MSSes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		 * and/or the remaining small portion (if present)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 		if (pkt_len > mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 			unsigned int new_len = (pkt_len / mss) * mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 			if (!in_sack && new_len < pkt_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 				new_len += mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 			pkt_len = new_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		if (pkt_len >= skb->len && !in_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 		err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 				   pkt_len, mss, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 			return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	return in_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) /* Mark the given newly-SACKed range as such, adjusting counters and hints. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) static u8 tcp_sacktag_one(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 			  struct tcp_sacktag_state *state, u8 sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 			  u32 start_seq, u32 end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 			  int dup_sack, int pcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 			  u64 xmit_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	/* Account D-SACK for retransmitted packet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	if (dup_sack && (sacked & TCPCB_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 		if (tp->undo_marker && tp->undo_retrans > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 		    after(end_seq, tp->undo_marker))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 			tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 		if ((sacked & TCPCB_SACKED_ACKED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		    before(start_seq, state->reord))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 				state->reord = start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	/* Nothing to do; acked frame is about to be dropped (was ACKed). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	if (!after(end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 		return sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	if (!(sacked & TCPCB_SACKED_ACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 		tcp_rack_advance(tp, sacked, end_seq, xmit_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 		if (sacked & TCPCB_SACKED_RETRANS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 			/* If the segment is not tagged as lost,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 			 * we do not clear RETRANS, believing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 			 * that retransmission is still in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 			if (sacked & TCPCB_LOST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 				sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 				tp->lost_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 				tp->retrans_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 			if (!(sacked & TCPCB_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 				/* New sack for not retransmitted frame,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 				 * which was in hole. It is reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 				if (before(start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 					   tcp_highest_sack_seq(tp)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 				    before(start_seq, state->reord))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 					state->reord = start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 				if (!after(end_seq, tp->high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 					state->flag |= FLAG_ORIG_SACK_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 				if (state->first_sackt == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 					state->first_sackt = xmit_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 				state->last_sackt = xmit_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 			if (sacked & TCPCB_LOST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 				sacked &= ~TCPCB_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 				tp->lost_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 		sacked |= TCPCB_SACKED_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 		state->flag |= FLAG_DATA_SACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 		tp->sacked_out += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 		/* Out-of-order packets delivered */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 		state->sack_delivered += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 		if (tp->lost_skb_hint &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 		    before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 			tp->lost_cnt_hint += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	/* D-SACK. We can detect redundant retransmission in S|R and plain R
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	 * frames and clear it. undo_retrans is decreased above, L|R frames
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	 * are accounted above as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		sacked &= ~TCPCB_SACKED_RETRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 		tp->retrans_out -= pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	return sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) /* Shift newly-SACKed bytes from this skb to the immediately previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)  * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 			    struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 			    struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 			    unsigned int pcount, int shifted, int mss,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 			    bool dup_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 	u32 start_seq = TCP_SKB_CB(skb)->seq;	/* start of newly-SACKed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	u32 end_seq = start_seq + shifted;	/* end of newly-SACKed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	BUG_ON(!pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	/* Adjust counters and hints for the newly sacked sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	 * range but discard the return value since prev is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	 * marked. We must tag the range first because the seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	 * advancement below implicitly advances
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	 * tcp_highest_sack_seq() when skb is highest_sack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 			start_seq, end_seq, dup_sack, pcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 			tcp_skb_timestamp_us(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 	tcp_rate_skb_delivered(sk, skb, state->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	if (skb == tp->lost_skb_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 		tp->lost_cnt_hint += pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	TCP_SKB_CB(prev)->end_seq += shifted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	TCP_SKB_CB(skb)->seq += shifted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	tcp_skb_pcount_add(prev, pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 	WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	tcp_skb_pcount_add(skb, -pcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	/* When we're adding to gso_segs == 1, gso_size will be zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 	 * in theory this shouldn't be necessary but as long as DSACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 	 * code can come after this skb later on it's better to keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 	 * setting gso_size to something.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	if (!TCP_SKB_CB(prev)->tcp_gso_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 		TCP_SKB_CB(prev)->tcp_gso_size = mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	/* CHECKME: To clear or not to clear? Mimics normal skb currently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	if (tcp_skb_pcount(skb) <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		TCP_SKB_CB(skb)->tcp_gso_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	if (skb->len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 		BUG_ON(!tcp_skb_pcount(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	/* Whole SKB was eaten :-) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	if (skb == tp->retransmit_skb_hint)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 		tp->retransmit_skb_hint = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	if (skb == tp->lost_skb_hint) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 		tp->lost_skb_hint = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 		tp->lost_cnt_hint -= tcp_skb_pcount(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 	TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 		TCP_SKB_CB(prev)->end_seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	if (skb == tcp_highest_sack(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 		tcp_advance_highest_sack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	tcp_skb_collapse_tstamp(prev, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 		TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	tcp_rtx_queue_unlink_and_free(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) /* I wish gso_size would have a bit more sane initialization than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474)  * something-or-zero which complicates things
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) static int tcp_skb_seglen(const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) /* Shifting pages past head area doesn't work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) static int skb_can_shift(const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	return !skb_headlen(skb) && skb_is_nonlinear(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 		  int pcount, int shiftlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 	/* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	 * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 	 * to make sure not storing more than 65535 * 8 bytes per skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	 * even if current MSS is bigger.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	return skb_shift(to, from, shiftlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) /* Try collapsing SACK blocks spanning across multiple skbs to a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503)  * skb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 					  struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 					  u32 start_seq, u32 end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 					  bool dup_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	struct sk_buff *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	int mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	int pcount = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	int in_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	/* Normally R but no L won't result in plain S */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	if (!dup_sack &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	    (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	if (!skb_can_shift(skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	/* This frame is about to be dropped (was ACKed). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 	if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	/* Can only happen with delayed DSACK + discard craziness */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	prev = skb_rb_prev(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	if (!prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 	if (!tcp_skb_can_collapse(prev, skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		  !before(end_seq, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 	if (in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 		len = skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 		pcount = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 		mss = tcp_skb_seglen(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		/* TODO: Fix DSACKs to not fragment already SACKed and we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 		 * drop this restriction as unnecessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		if (mss != tcp_skb_seglen(prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 			goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 		if (!after(TCP_SKB_CB(skb)->end_seq, start_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 			goto noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 		/* CHECKME: This is non-MSS split case only?, this will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 		 * cause skipped skbs due to advancing loop btw, original
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 		 * has that feature too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		if (tcp_skb_pcount(skb) <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 			goto noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		if (!in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 			/* TODO: head merge to next could be attempted here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 			 * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 			 * though it might not be worth of the additional hassle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 			 * ...we can probably just fallback to what was done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 			 * previously. We could try merging non-SACKed ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 			 * as well but it probably isn't going to buy off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 			 * because later SACKs might again split them, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 			 * it would make skb timestamp tracking considerably
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 			 * harder problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 			goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 		len = end_seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 		BUG_ON(len < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 		BUG_ON(len > skb->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 		/* MSS boundaries should be honoured or else pcount will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 		 * severely break even though it makes things bit trickier.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 		 * Optimize common case to avoid most of the divides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 		mss = tcp_skb_mss(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 		/* TODO: Fix DSACKs to not fragment already SACKed and we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 		 * drop this restriction as unnecessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 		if (mss != tcp_skb_seglen(prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 			goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 		if (len == mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 			pcount = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 		} else if (len < mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 			goto noop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 			pcount = len / mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 			len = pcount * mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	/* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	if (!tcp_skb_shift(prev, skb, pcount, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 		goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	/* Hole filled allows collapsing with the next as well, this is very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	 * useful when hole on every nth skb pattern happens
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	skb = skb_rb_next(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	if (!skb_can_shift(skb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	    ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	    (mss != tcp_skb_seglen(skb)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 	if (!tcp_skb_can_collapse(prev, skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 	len = skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 	pcount = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 	if (tcp_skb_shift(prev, skb, pcount, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 		tcp_shifted_skb(sk, prev, skb, state, pcount,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 				len, mss, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	return prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) noop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) fallback:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 					struct tcp_sack_block *next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 					struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 					u32 start_seq, u32 end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 					bool dup_sack_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 	struct sk_buff *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	skb_rbtree_walk_from(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 		int in_sack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 		bool dup_sack = dup_sack_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		/* queue is in-order => we can short-circuit the walk early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 		if (!before(TCP_SKB_CB(skb)->seq, end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 		if (next_dup  &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 		    before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 			in_sack = tcp_match_skb_to_sack(sk, skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 							next_dup->start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 							next_dup->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 			if (in_sack > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 				dup_sack = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 		/* skb reference here is a bit tricky to get right, since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 		 * shifting can eat and free both this skb and the next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 		 * so not even _safe variant of the loop is enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 		if (in_sack <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 			tmp = tcp_shift_skb_data(sk, skb, state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 						 start_seq, end_seq, dup_sack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 			if (tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 				if (tmp != skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 					skb = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 				in_sack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 				in_sack = tcp_match_skb_to_sack(sk, skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 								start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 								end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 		if (unlikely(in_sack < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 		if (in_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 			TCP_SKB_CB(skb)->sacked =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 				tcp_sacktag_one(sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 						state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 						TCP_SKB_CB(skb)->sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 						TCP_SKB_CB(skb)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 						TCP_SKB_CB(skb)->end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 						dup_sack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 						tcp_skb_pcount(skb),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 						tcp_skb_timestamp_us(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 			tcp_rate_skb_delivered(sk, skb, state->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 				list_del_init(&skb->tcp_tsorted_anchor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 			if (!before(TCP_SKB_CB(skb)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 				    tcp_highest_sack_seq(tp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 				tcp_advance_highest_sack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 		parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 		skb = rb_to_skb(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 		if (before(seq, TCP_SKB_CB(skb)->seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 			p = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 		if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 			p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 		return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 					u32 skip_to_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 	if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 		return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 	return tcp_sacktag_bsearch(sk, skip_to_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 						struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 						struct tcp_sack_block *next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 						struct tcp_sacktag_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 						u32 skip_to_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	if (!next_dup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 		return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	if (before(next_dup->start_seq, skip_to_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 		skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 		skb = tcp_sacktag_walk(skb, sk, NULL, state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 				       next_dup->start_seq, next_dup->end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 				       1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 	return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 			u32 prior_snd_una, struct tcp_sacktag_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 	const unsigned char *ptr = (skb_transport_header(ack_skb) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 				    TCP_SKB_CB(ack_skb)->sacked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	struct tcp_sack_block sp[TCP_NUM_SACKS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 	struct tcp_sack_block *cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 	int used_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	bool found_dup_sack = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 	int first_sack_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	state->flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 	state->reord = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 	if (!tp->sacked_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 		tcp_highest_sack_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 					 num_sacks, prior_snd_una, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 	/* Eliminate too old ACKs, but take into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 	 * account more or less fresh ones, they can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 	 * contain valid SACK info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	if (before(TCP_SKB_CB(ack_skb)->ack_seq, prior_snd_una - tp->max_window))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 	if (!tp->packets_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 	used_sacks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	first_sack_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 	for (i = 0; i < num_sacks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 		bool dup_sack = !i && found_dup_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 		sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 		sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 		if (!tcp_is_sackblock_valid(tp, dup_sack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 					    sp[used_sacks].start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 					    sp[used_sacks].end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 			int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 			if (dup_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 				if (!tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 					mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 				else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 					mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 				/* Don't count olds caused by ACK reordering */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 				if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 				    !after(sp[used_sacks].end_seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 				mib_idx = LINUX_MIB_TCPSACKDISCARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 			NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 			if (i == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 				first_sack_index = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 		/* Ignore very old stuff early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 		if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 			if (i == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 				first_sack_index = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 		used_sacks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 	/* order SACK blocks to allow in order walk of the retrans queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	for (i = used_sacks - 1; i > 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 		for (j = 0; j < i; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 			if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 				swap(sp[j], sp[j + 1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 				/* Track where the first SACK block goes to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 				if (j == first_sack_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 					first_sack_index = j + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	state->mss_now = tcp_current_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	if (!tp->sacked_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 		/* It's already past, so skip checking against it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 		cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 		cache = tp->recv_sack_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 		/* Skip empty blocks in at head of the cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 		while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 		       !cache->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 			cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 	while (i < used_sacks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 		u32 start_seq = sp[i].start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 		u32 end_seq = sp[i].end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 		bool dup_sack = (found_dup_sack && (i == first_sack_index));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 		struct tcp_sack_block *next_dup = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 		if (found_dup_sack && ((i + 1) == first_sack_index))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 			next_dup = &sp[i + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 		/* Skip too early cached blocks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 		while (tcp_sack_cache_ok(tp, cache) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		       !before(start_seq, cache->end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 			cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 		/* Can skip some work by looking recv_sack_cache? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 		if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 		    after(end_seq, cache->start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 			/* Head todo? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 			if (before(start_seq, cache->start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 				skb = tcp_sacktag_skip(skb, sk, start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 				skb = tcp_sacktag_walk(skb, sk, next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 						       state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 						       start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 						       cache->start_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 						       dup_sack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 			/* Rest of the block already fully processed? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 			if (!after(end_seq, cache->end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 				goto advance_sp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 			skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 						       state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 						       cache->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 			/* ...tail remains todo... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 			if (tcp_highest_sack_seq(tp) == cache->end_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 				/* ...but better entrypoint exists! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 				skb = tcp_highest_sack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 				if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 				cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 				goto walk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 			skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 			/* Check overlap against next cached too (past this one already) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 			cache++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 		if (!before(start_seq, tcp_highest_sack_seq(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 			skb = tcp_highest_sack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 			if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 		skb = tcp_sacktag_skip(skb, sk, start_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) walk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 		skb = tcp_sacktag_walk(skb, sk, next_dup, state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 				       start_seq, end_seq, dup_sack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) advance_sp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 		i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	/* Clear the head of the cache sack blocks so we can skip it next time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 	for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 		tp->recv_sack_cache[i].start_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 		tp->recv_sack_cache[i].end_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 	for (j = 0; j < used_sacks; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		tp->recv_sack_cache[i++] = sp[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 	if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 		tcp_check_sack_reordering(sk, state->reord, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 	tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) #if FASTRETRANS_DEBUG > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	WARN_ON((int)tp->sacked_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	WARN_ON((int)tp->lost_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 	WARN_ON((int)tp->retrans_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 	WARN_ON((int)tcp_packets_in_flight(tp) < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	return state->flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) /* Limits sacked_out so that sum with lost_out isn't ever larger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)  * packets_out. Returns false if sacked_out adjustement wasn't necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 	u32 holes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 	holes = max(tp->lost_out, 1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	holes = min(holes, tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	if ((tp->sacked_out + holes) > tp->packets_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 		tp->sacked_out = tp->packets_out - holes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /* If we receive more dupacks than we expected counting segments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)  * in assumption of absent reordering, interpret this as reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)  * The only another reason could be bug in receiver TCP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) static void tcp_check_reno_reordering(struct sock *sk, const int addend)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	if (!tcp_limit_reno_sacked(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 	tp->reordering = min_t(u32, tp->packets_out + addend,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 			       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	tp->reord_seen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) /* Emulate SACKs for SACKless connection: account for a new dupack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 	if (num_dupack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 		struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 		u32 prior_sacked = tp->sacked_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 		s32 delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 		tp->sacked_out += num_dupack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 		tcp_check_reno_reordering(sk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 		delivered = tp->sacked_out - prior_sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 		if (delivered > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 			tcp_count_delivered(tp, delivered, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 		tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) /* Account for ACK, ACKing some data in Reno Recovery phase. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 	if (acked > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 		/* One ACK acked hole. The rest eat duplicate ACKs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 		tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 				    ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 		if (acked - 1 >= tp->sacked_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 			tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 			tp->sacked_out -= acked - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 	tcp_check_reno_reordering(sk, acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) void tcp_clear_retrans(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	tp->retrans_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 	tp->lost_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 	tp->undo_retrans = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) static inline void tcp_init_undo(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	tp->undo_marker = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 	/* Retransmission still in flight may cause DSACKs later. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 	tp->undo_retrans = tp->retrans_out ? : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) static bool tcp_is_rack(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 	return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) /* If we detect SACK reneging, forget all SACK information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063)  * and reset tags completely, otherwise preserve SACKs. If receiver
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064)  * dropped its ofo queue, we will know this due to reneging detection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) static void tcp_timeout_mark_lost(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 	struct sk_buff *skb, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	bool is_reneg;			/* is receiver reneging on SACKs? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 	head = tcp_rtx_queue_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	is_reneg = head && (TCP_SKB_CB(head)->sacked & TCPCB_SACKED_ACKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	if (is_reneg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 		tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 		/* Mark SACK reneging until we recover from this loss event. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 		tp->is_sack_reneg = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 	} else if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 		tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 	skb = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 	skb_rbtree_walk_from(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 		if (is_reneg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 		else if (tcp_is_rack(sk) && skb != head &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 			 tcp_rack_skb_timeout(tp, skb, 0) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 			continue; /* Don't mark recently sent ones lost yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 		tcp_mark_skb_lost(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 	tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 	tcp_clear_all_retrans_hints(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) /* Enter Loss state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) void tcp_enter_loss(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 	struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 	bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 	tcp_timeout_mark_lost(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 	/* Reduce ssthresh if it has not yet been made inside this window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 	    !after(tp->high_seq, tp->snd_una) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 		tp->prior_cwnd = tp->snd_cwnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 		tcp_ca_event(sk, CA_EVENT_LOSS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 		tcp_init_undo(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 	tp->snd_cwnd	   = tcp_packets_in_flight(tp) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 	tp->snd_cwnd_cnt   = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 	tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 	/* Timeout in disordered state after receiving substantial DUPACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 	 * suggests that the degree of reordering is over-estimated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 	if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 	    tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 		tp->reordering = min_t(unsigned int, tp->reordering,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 				       net->ipv4.sysctl_tcp_reordering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 	tcp_set_ca_state(sk, TCP_CA_Loss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 	tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	tcp_ecn_queue_cwr(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 	 * loss recovery is underway except recurring timeout(s) on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 	tp->frto = net->ipv4.sysctl_tcp_frto &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 		   (new_recovery || icsk->icsk_retransmits) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 		   !inet_csk(sk)->icsk_mtup.probe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) /* If ACK arrived pointing to a remembered SACK, it means that our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141)  * remembered SACKs do not reflect real state of receiver i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142)  * receiver _host_ is heavily congested (or buggy).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144)  * To avoid big spurious retransmission bursts due to transient SACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145)  * scoreboard oddities that look like reneging, we give the receiver a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146)  * little time (max(RTT/2, 10ms)) to send us some more ACKs that will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147)  * restore sanity to the SACK scoreboard. If the apparent reneging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148)  * persists until this RTO then we'll clear the SACK scoreboard.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) static bool tcp_check_sack_reneging(struct sock *sk, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 	if (flag & FLAG_SACK_RENEGING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 		struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 		unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 					  msecs_to_jiffies(10));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 					  delay, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165)  * counter when SACK is enabled (without SACK, sacked_out is used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166)  * that purpose).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168)  * With reordering, holes may still be in flight, so RFC3517 recovery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169)  * uses pure sacked_out (total number of SACKed segments) even though
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170)  * it violates the RFC that uses duplicate ACKs, often these are equal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171)  * but when e.g. out-of-window ACKs or packet duplication occurs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172)  * they differ. Since neither occurs due to loss, TCP should really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173)  * ignore them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 	return tp->sacked_out + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) /* Linux NewReno/SACK/ECN state machine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181)  * --------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183)  * "Open"	Normal state, no dubious events, fast path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184)  * "Disorder"   In all the respects it is "Open",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185)  *		but requires a bit more attention. It is entered when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186)  *		we see some SACKs or dupacks. It is split of "Open"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187)  *		mainly to move some processing from fast path to slow one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188)  * "CWR"	CWND was reduced due to some Congestion Notification event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189)  *		It can be ECN, ICMP source quench, local device congestion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)  * "Recovery"	CWND was reduced, we are fast-retransmitting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191)  * "Loss"	CWND was reduced due to RTO timeout or SACK reneging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193)  * tcp_fastretrans_alert() is entered:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194)  * - each incoming ACK, if state is not "Open"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195)  * - when arrived ACK is unusual, namely:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196)  *	* SACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)  *	* Duplicate ACK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198)  *	* ECN ECE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200)  * Counting packets in flight is pretty simple.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202)  *	in_flight = packets_out - left_out + retrans_out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204)  *	packets_out is SND.NXT-SND.UNA counted in packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206)  *	retrans_out is number of retransmitted segments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208)  *	left_out is number of segments left network, but not ACKed yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210)  *		left_out = sacked_out + lost_out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)  *     sacked_out: Packets, which arrived to receiver out of order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213)  *		   and hence not ACKed. With SACKs this number is simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214)  *		   amount of SACKed data. Even without SACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)  *		   it is easy to give pretty reliable estimate of this number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216)  *		   counting duplicate ACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218)  *       lost_out: Packets lost by network. TCP has no explicit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)  *		   "loss notification" feedback from network (for now).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)  *		   It means that this number can be only _guessed_.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)  *		   Actually, it is the heuristics to predict lossage that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)  *		   distinguishes different algorithms.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)  *	F.e. after RTO, when all the queue is considered as lost,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)  *	lost_out = packets_out and in_flight = retrans_out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)  *		Essentially, we have now a few algorithms detecting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)  *		lost packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)  *		If the receiver supports SACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)  *		RFC6675/3517: It is the conventional algorithm. A packet is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233)  *		considered lost if the number of higher sequence packets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234)  *		SACKed is greater than or equal the DUPACK thoreshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235)  *		(reordering). This is implemented in tcp_mark_head_lost and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236)  *		tcp_update_scoreboard.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238)  *		RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)  *		(2017-) that checks timing instead of counting DUPACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)  *		Essentially a packet is considered lost if it's not S/ACKed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)  *		after RTT + reordering_window, where both metrics are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242)  *		dynamically measured and adjusted. This is implemented in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243)  *		tcp_rack_mark_lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245)  *		If the receiver does not support SACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)  *		NewReno (RFC6582): in Recovery we assume that one segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248)  *		is lost (classic Reno). While we are in Recovery and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249)  *		a partial ACK arrives, we assume that one more packet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250)  *		is lost (NewReno). This heuristics are the same in NewReno
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251)  *		and SACK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253)  * Really tricky (and requiring careful tuning) part of algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254)  * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255)  * The first determines the moment _when_ we should reduce CWND and,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256)  * hence, slow down forward transmission. In fact, it determines the moment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257)  * when we decide that hole is caused by loss, rather than by a reorder.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259)  * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260)  * holes, caused by lost packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262)  * And the most logically complicated part of algorithm is undo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)  * heuristics. We detect false retransmits due to both too early
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264)  * fast retransmit (reordering) and underestimated RTO, analyzing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265)  * timestamps and D-SACKs. When we detect that some segments were
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266)  * retransmitted by mistake and CWND reduction was wrong, we undo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267)  * window reduction and abort recovery phase. This logic is hidden
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268)  * inside several functions named tcp_try_undo_<something>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) /* This function decides, when we should leave Disordered state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272)  * and enter Recovery phase, reducing congestion window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)  * Main question: may we further continue forward transmission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275)  * with the same cwnd?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) static bool tcp_time_to_recover(struct sock *sk, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 	/* Trick#1: The loss is proven. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 	if (tp->lost_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 	/* Not-A-Trick#2 : Classic rule... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) /* Detect loss in event "A" above by marking head of queue up as lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293)  * For RFC3517 SACK, a segment is considered lost if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294)  * has at least tp->reordering SACKed seqments above it; "packets" refers to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295)  * the maximum SACKed segments to pass before reaching this limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	int cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	/* Use SACK to deduce losses of new sequences sent during recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	const u32 loss_high = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	WARN_ON(packets > tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	skb = tp->lost_skb_hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 	if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 		/* Head already handled? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 		if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 		cnt = tp->lost_cnt_hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 		skb = tcp_rtx_queue_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 		cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 	skb_rbtree_walk_from(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 		/* TODO: do this better */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 		/* this is not the most efficient way to do this... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 		tp->lost_skb_hint = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 		tp->lost_cnt_hint = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 		if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 			cnt += tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 		if (cnt > packets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 			tcp_mark_skb_lost(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 		if (mark_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 	tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) /* Account newly detected lost packet(s) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 	if (tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 		int sacked_upto = tp->sacked_out - tp->reordering;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 		if (sacked_upto >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 			tcp_mark_head_lost(sk, sacked_upto, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 		else if (fast_rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 			tcp_mark_head_lost(sk, 1, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 	return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 	       before(tp->rx_opt.rcv_tsecr, when);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) /* skb is spurious retransmitted if the returned timestamp echo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)  * reply is prior to the skb transmission time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 				     const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 	       tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) /* Nothing was retransmitted or returned timestamp is less
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373)  * than timestamp of the first retransmission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 	return tp->retrans_stamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 	       tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) /* Undo procedures. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) /* We can clear retrans_stamp when there are no retransmissions in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384)  * window. It would seem that it is trivially available for us in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385)  * tp->retrans_out, however, that kind of assumptions doesn't consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386)  * what will happen if errors occur when sending retransmission for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387)  * second time. ...It could the that such segment has only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)  * TCPCB_EVER_RETRANS set at the present time. It seems that checking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389)  * the head skb is enough except for some reneging corner cases that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390)  * are not worth the effort.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392)  * Main reason for all this complexity is the fact that connection dying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393)  * time now depends on the validity of the retrans_stamp, in particular,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394)  * that successive retransmissions of a segment must not advance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395)  * retrans_stamp under any conditions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) static bool tcp_any_retrans_done(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 	if (tp->retrans_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 	skb = tcp_rtx_queue_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 	if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) static void DBGUNDO(struct sock *sk, const char *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) #if FASTRETRANS_DEBUG > 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 	struct inet_sock *inet = inet_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 	if (sk->sk_family == AF_INET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 		pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 			 msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 			 &inet->inet_daddr, ntohs(inet->inet_dport),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 			 tp->snd_cwnd, tcp_left_out(tp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 			 tp->snd_ssthresh, tp->prior_ssthresh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 			 tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) #if IS_ENABLED(CONFIG_IPV6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 	else if (sk->sk_family == AF_INET6) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 		pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 			 msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 			 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 			 tp->snd_cwnd, tcp_left_out(tp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 			 tp->snd_ssthresh, tp->prior_ssthresh,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 			 tp->packets_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 	if (unmark_loss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 		struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 		skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 		tp->lost_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 		tcp_clear_all_retrans_hints(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 	if (tp->prior_ssthresh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 		const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 		tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 		if (tp->prior_ssthresh > tp->snd_ssthresh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 			tp->snd_ssthresh = tp->prior_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 			tcp_ecn_withdraw_cwr(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 	tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 	tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 	tp->rack.advanced = 1; /* Force RACK to re-exam losses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) static inline bool tcp_may_undo(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) /* People celebrate: "We love our President!" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) static bool tcp_try_undo_recovery(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 	if (tcp_may_undo(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 		int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 		/* Happy end! We did not retransmit anything
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 		 * or our original transmission succeeded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 		tcp_undo_cwnd_reduction(sk, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 			mib_idx = LINUX_MIB_TCPLOSSUNDO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 			mib_idx = LINUX_MIB_TCPFULLUNDO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 		NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 	} else if (tp->rack.reo_wnd_persist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 		tp->rack.reo_wnd_persist--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 		/* Hold old state until something *above* high_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 		 * is ACKed. For Reno it is MUST to prevent false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 		 * fast retransmits (RFC2582). SACK TCP is safe. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 		if (!tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 			tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 	tcp_set_ca_state(sk, TCP_CA_Open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	tp->is_sack_reneg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) static bool tcp_try_undo_dsack(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 	if (tp->undo_marker && !tp->undo_retrans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 		tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 					       tp->rack.reo_wnd_persist + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 		DBGUNDO(sk, "D-SACK");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 		tcp_undo_cwnd_reduction(sk, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) /* Undo during loss recovery after partial ACK or using F-RTO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 	if (frto_undo || tcp_may_undo(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 		tcp_undo_cwnd_reduction(sk, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 		DBGUNDO(sk, "partial loss");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 		if (frto_undo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 			NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 					LINUX_MIB_TCPSPURIOUSRTOS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 		inet_csk(sk)->icsk_retransmits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 		if (frto_undo || tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 			tcp_set_ca_state(sk, TCP_CA_Open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 			tp->is_sack_reneg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) /* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548)  * It computes the number of packets to send (sndcnt) based on packets newly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549)  * delivered:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)  *   1) If the packets in flight is larger than ssthresh, PRR spreads the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551)  *	cwnd reductions across a full RTT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552)  *   2) Otherwise PRR uses packet conservation to send as much as delivered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553)  *      But when the retransmits are acked without further losses, PRR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554)  *      slow starts cwnd up to ssthresh to speed up the recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) static void tcp_init_cwnd_reduction(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 	tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 	tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 	tp->snd_cwnd_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 	tp->prior_cwnd = tp->snd_cwnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 	tp->prr_delivered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 	tp->prr_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 	tcp_ecn_queue_cwr(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 	int sndcnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 	if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 	tp->prr_delivered += newly_acked_sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 	if (delta < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 			       tp->prior_cwnd - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 		sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 	} else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 		   FLAG_RETRANS_DATA_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 		sndcnt = min_t(int, delta,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 			       max_t(int, tp->prr_delivered - tp->prr_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 				     newly_acked_sacked) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 		sndcnt = min(delta, newly_acked_sacked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 	/* Force a fast retransmit upon entering fast recovery */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 	sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 	tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) static inline void tcp_end_cwnd_reduction(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 	if (inet_csk(sk)->icsk_ca_ops->cong_control)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 	if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 	    (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 		tp->snd_cwnd = tp->snd_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 		tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) void tcp_enter_cwr(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 	tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 		tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 		tcp_init_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 		tcp_set_ca_state(sk, TCP_CA_CWR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) EXPORT_SYMBOL(tcp_enter_cwr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) static void tcp_try_keep_open(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 	int state = TCP_CA_Open;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 	if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 		state = TCP_CA_Disorder;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 	if (inet_csk(sk)->icsk_ca_state != state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 		tcp_set_ca_state(sk, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 		tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) static void tcp_try_to_open(struct sock *sk, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 	tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 	if (!tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 	if (flag & FLAG_ECE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 		tcp_enter_cwr(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 		tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) static void tcp_mtup_probe_failed(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 	icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 	icsk->icsk_mtup.probe_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) static void tcp_mtup_probe_success(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 	/* FIXME: breaks with very large cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 	tp->prior_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 	tp->snd_cwnd = tp->snd_cwnd *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 		       tcp_mss_to_mtu(sk, tp->mss_cache) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 		       icsk->icsk_mtup.probe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 	tp->snd_cwnd_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 	tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 	tp->snd_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 	icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	icsk->icsk_mtup.probe_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) /* Do a simple retransmit without using the backoff mechanisms in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688)  * tcp_timer. This is used for path mtu discovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689)  * The socket is already locked here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) void tcp_simple_retransmit(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 	unsigned int mss = tcp_current_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 	skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 		if (tcp_skb_seglen(skb) > mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 			tcp_mark_skb_lost(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 	tcp_clear_retrans_hints_partial(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 	if (!tp->lost_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 	if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 		tcp_limit_reno_sacked(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 	tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 	/* Don't muck with the congestion window here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 	 * Reason is that we do not increase amount of _data_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 	 * in network, but units changed and effective
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 	 * cwnd/ssthresh really reduced now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 	if (icsk->icsk_ca_state != TCP_CA_Loss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 		tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 		tp->snd_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 		tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 		tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 		tcp_set_ca_state(sk, TCP_CA_Loss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 	tcp_xmit_retransmit_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) EXPORT_SYMBOL(tcp_simple_retransmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) void tcp_enter_recovery(struct sock *sk, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 	int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) 	if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 		mib_idx = LINUX_MIB_TCPRENORECOVERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 		mib_idx = LINUX_MIB_TCPSACKRECOVERY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 	NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 	tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 	tcp_init_undo(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 	if (!tcp_in_cwnd_reduction(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 		if (!ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 			tp->prior_ssthresh = tcp_current_ssthresh(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 		tcp_init_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 	tcp_set_ca_state(sk, TCP_CA_Recovery);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753)  * recovered or spurious. Otherwise retransmits more on partial ACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 			     int *rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 	bool recovered = !before(tp->snd_una, tp->high_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 	if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 	    tcp_try_undo_loss(sk, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) 	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 		/* Step 3.b. A timeout is spurious if not all data are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 		 * lost, i.e., never-retransmitted data are (s)acked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 		if ((flag & FLAG_ORIG_SACK_ACKED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 		    tcp_try_undo_loss(sk, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 		if (after(tp->snd_nxt, tp->high_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 			if (flag & FLAG_DATA_SACKED || num_dupack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) 				tp->frto = 0; /* Step 3.a. loss was real */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) 		} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) 			tp->high_seq = tp->snd_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) 			/* Step 2.b. Try send new data (but deferred until cwnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) 			 * is updated in tcp_ack()). Otherwise fall back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) 			 * the conventional recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) 			if (!tcp_write_queue_empty(sk) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 			    after(tcp_wnd_end(tp), tp->snd_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) 				*rexmit = REXMIT_NEW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 			tp->frto = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 	if (recovered) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 		/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 		tcp_try_undo_recovery(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 	if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 		/* A Reno DUPACK means new data in F-RTO step 2.b above are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 		 * delivered. Lower inflight to clock out (re)tranmissions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 		if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 			tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 		else if (flag & FLAG_SND_UNA_ADVANCED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 			tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 	*rexmit = REXMIT_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) static bool tcp_force_fast_retransmit(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 	return after(tcp_highest_sack_seq(tp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 		     tp->snd_una + tp->reordering * tp->mss_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) /* Undo during fast recovery after partial ACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 				 bool *do_lost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 	if (tp->undo_marker && tcp_packet_delayed(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 		/* Plain luck! Hole if filled with delayed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 		 * packet, rather than with a retransmit. Check reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 		tcp_check_sack_reordering(sk, prior_snd_una, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 		/* We are getting evidence that the reordering degree is higher
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 		 * than we realized. If there are no retransmits out then we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 		 * can undo. Otherwise we clock out new packets but do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 		 * mark more packets lost or retransmit more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 		if (tp->retrans_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 		if (!tcp_any_retrans_done(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 			tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 		DBGUNDO(sk, "partial recovery");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 		tcp_undo_cwnd_reduction(sk, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 		tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 		/* Partial ACK arrived. Force fast retransmit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 		*do_lost = tcp_force_fast_retransmit(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 	if (tcp_rtx_queue_empty(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 	if (unlikely(tcp_is_reno(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 		tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 	} else if (tcp_is_rack(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 		u32 prior_retrans = tp->retrans_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 		if (tcp_rack_mark_lost(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 			*ack_flag &= ~FLAG_SET_XMIT_TIMER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 		if (prior_retrans > tp->retrans_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 			*ack_flag |= FLAG_LOST_RETRANS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) /* Process an event, which can update packets-in-flight not trivially.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870)  * Main goal of this function is to calculate new estimate for left_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871)  * taking into account both packets sitting in receiver's buffer and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872)  * packets lost by network.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874)  * Besides that it updates the congestion state when packet loss or ECN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875)  * is detected. But it does not reduce the cwnd, it is done by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876)  * congestion control later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878)  * It does _not_ decide what to send, it is made in function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879)  * tcp_xmit_retransmit_queue().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 				  int num_dupack, int *ack_flag, int *rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 	int fast_rexmit = 0, flag = *ack_flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 	bool ece_ack = flag & FLAG_ECE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 	bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 				      tcp_force_fast_retransmit(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 	if (!tp->packets_out && tp->sacked_out)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 		tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 	/* Now state machine starts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 	if (ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) 		tp->prior_ssthresh = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 	/* B. In all the states check for reneging SACKs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 	if (tcp_check_sack_reneging(sk, flag))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 	/* C. Check consistency of the current state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 	tcp_verify_left_out(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 	/* D. Check state exit conditions. State can be terminated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 	 *    when high_seq is ACKed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 	if (icsk->icsk_ca_state == TCP_CA_Open) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 		WARN_ON(tp->retrans_out != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 		tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 	} else if (!before(tp->snd_una, tp->high_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 		switch (icsk->icsk_ca_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 		case TCP_CA_CWR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 			/* CWR is to be held something *above* high_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 			 * is ACKed for CWR bit to reach receiver. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 			if (tp->snd_una != tp->high_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 				tcp_end_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 				tcp_set_ca_state(sk, TCP_CA_Open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 		case TCP_CA_Recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 			if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 				tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 			if (tcp_try_undo_recovery(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 			tcp_end_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 	/* E. Process state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 	switch (icsk->icsk_ca_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 	case TCP_CA_Recovery:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 			if (tcp_is_reno(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 				tcp_add_reno_sack(sk, num_dupack, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 		} else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 		if (tcp_try_undo_dsack(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 			tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 		tcp_identify_packet_loss(sk, ack_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 		if (icsk->icsk_ca_state != TCP_CA_Recovery) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 			if (!tcp_time_to_recover(sk, flag))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 			/* Undo reverts the recovery state. If loss is evident,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 			 * starts a new recovery (e.g. reordering then loss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 			tcp_enter_recovery(sk, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 	case TCP_CA_Loss:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 		tcp_process_loss(sk, flag, num_dupack, rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 		tcp_identify_packet_loss(sk, ack_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 		if (!(icsk->icsk_ca_state == TCP_CA_Open ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) 		      (*ack_flag & FLAG_LOST_RETRANS)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) 		/* Change state if cwnd is undone or retransmits are lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 		if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 			if (flag & FLAG_SND_UNA_ADVANCED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) 				tcp_reset_reno_sack(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 			tcp_add_reno_sack(sk, num_dupack, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) 		if (icsk->icsk_ca_state <= TCP_CA_Disorder)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) 			tcp_try_undo_dsack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) 		tcp_identify_packet_loss(sk, ack_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) 		if (!tcp_time_to_recover(sk, flag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 			tcp_try_to_open(sk, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 		/* MTU probe failure: don't reduce cwnd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 		if (icsk->icsk_ca_state < TCP_CA_CWR &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 		    icsk->icsk_mtup.probe_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 		    tp->snd_una == tp->mtu_probe.probe_seq_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) 			tcp_mtup_probe_failed(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 			/* Restores the reduction we did in tcp_mtup_probe() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) 			tp->snd_cwnd++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) 			tcp_simple_retransmit(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) 		/* Otherwise enter Recovery state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) 		tcp_enter_recovery(sk, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) 		fast_rexmit = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) 	if (!tcp_is_rack(sk) && do_lost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) 		tcp_update_scoreboard(sk, fast_rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) 	*rexmit = REXMIT_LOST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 	u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 	if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 		/* If the remote keeps returning delayed ACKs, eventually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 		 * the min filter would pick it up and overestimate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 		 * prop. delay when it expires. Skip suspected delayed ACKs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) 	minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) 			   rtt_us ? : jiffies_to_usecs(1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 			       long seq_rtt_us, long sack_rtt_us,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 			       long ca_rtt_us, struct rate_sample *rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 	/* Prefer RTT measured from ACK's timing to TS-ECR. This is because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 	 * broken middle-boxes or peers may corrupt TS-ECR fields. But
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 	 * Karn's algorithm forbids taking RTT if some retransmitted data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 	 * is acked (RFC6298).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) 	if (seq_rtt_us < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) 		seq_rtt_us = sack_rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 	/* RTTM Rule: A TSecr value received in a segment is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) 	 * update the averaged RTT measurement only if the segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) 	 * acknowledges some new data, i.e., only if it advances the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) 	 * left edge of the send window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) 	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) 	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) 	    flag & FLAG_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) 		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) 			if (!delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) 				delta = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) 			seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) 			ca_rtt_us = seq_rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) 	rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) 	if (seq_rtt_us < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) 	/* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) 	 * always taken together with ACK, SACK, or TS-opts. Any negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) 	 * values will be skipped with the seq_rtt_us < 0 check above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) 	tcp_update_rtt_min(sk, ca_rtt_us, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) 	tcp_rtt_estimator(sk, seq_rtt_us);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) 	tcp_set_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) 	/* RFC6298: only reset backoff on valid RTT measurement. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) 	inet_csk(sk)->icsk_backoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) 	struct rate_sample rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) 	long rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) 	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) 		rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) 	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us, &rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) 	icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) 	tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) /* Restart timer after forward progress on connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085)  * RFC2988 recommends to restart timer to now+rto.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) void tcp_rearm_rto(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) 	/* If the retrans timer is currently being used by Fast Open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) 	 * for SYN-ACK retrans purpose, stay put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) 	if (rcu_access_pointer(tp->fastopen_rsk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) 	if (!tp->packets_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) 		u32 rto = inet_csk(sk)->icsk_rto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) 		/* Offset the time elapsed after installing regular RTO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) 		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) 		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) 			s64 delta_us = tcp_rto_delta_us(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) 			/* delta_us may not be positive if the socket is locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) 			 * when the retrans timer fires and is rescheduled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) 			rto = usecs_to_jiffies(max_t(int, delta_us, 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) 		tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) 				     TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) /* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) static void tcp_set_xmit_timer(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) 	if (!tcp_schedule_loss_probe(sk, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) 		tcp_rearm_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) /* If we get here, the whole TSO packet has not been acked. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) 	u32 packets_acked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) 	BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) 	packets_acked = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) 	if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) 	packets_acked -= tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) 	if (packets_acked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) 		BUG_ON(tcp_skb_pcount(skb) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) 		BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) 	return packets_acked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) 			   u32 prior_snd_una)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) 	const struct skb_shared_info *shinfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) 	/* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) 	if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) 	shinfo = skb_shinfo(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) 	if (!before(shinfo->tskey, prior_snd_una) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) 	    before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) 		tcp_skb_tsorted_save(skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) 			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) 		} tcp_skb_tsorted_restore(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) /* Remove acknowledged frames from the retransmission queue. If our packet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163)  * is before the ack sequence we can discard it as it's confirmed to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164)  * arrived at the other end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) 			       u32 prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) 			       struct tcp_sacktag_state *sack, bool ece_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) 	u64 first_ackt, last_ackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) 	u32 prior_sacked = tp->sacked_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) 	u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) 	struct sk_buff *skb, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) 	bool fully_acked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) 	long sack_rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) 	long seq_rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) 	long ca_rtt_us = -1L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) 	u32 pkts_acked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) 	u32 last_in_flight = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) 	bool rtt_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) 	int flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) 	first_ackt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) 	for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) 		const u32 start_seq = scb->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) 		u8 sacked = scb->sacked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) 		u32 acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) 		/* Determine how many packets and what bytes were acked, tso and else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) 		if (after(scb->end_seq, tp->snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) 			if (tcp_skb_pcount(skb) == 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) 			    !after(tp->snd_una, scb->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) 			acked_pcount = tcp_tso_acked(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) 			if (!acked_pcount)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) 			fully_acked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) 			acked_pcount = tcp_skb_pcount(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) 		if (unlikely(sacked & TCPCB_RETRANS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) 			if (sacked & TCPCB_SACKED_RETRANS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) 				tp->retrans_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) 			flag |= FLAG_RETRANS_DATA_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) 		} else if (!(sacked & TCPCB_SACKED_ACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) 			last_ackt = tcp_skb_timestamp_us(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) 			WARN_ON_ONCE(last_ackt == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) 			if (!first_ackt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) 				first_ackt = last_ackt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) 			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) 			if (before(start_seq, reord))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) 				reord = start_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) 			if (!after(scb->end_seq, tp->high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) 				flag |= FLAG_ORIG_SACK_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) 		if (sacked & TCPCB_SACKED_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) 			tp->sacked_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) 		} else if (tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) 			tcp_count_delivered(tp, acked_pcount, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) 			if (!tcp_skb_spurious_retrans(tp, skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) 				tcp_rack_advance(tp, sacked, scb->end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) 						 tcp_skb_timestamp_us(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) 		if (sacked & TCPCB_LOST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) 			tp->lost_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) 		tp->packets_out -= acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) 		pkts_acked += acked_pcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) 		tcp_rate_skb_delivered(sk, skb, sack->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) 		/* Initial outgoing SYN's get put onto the write_queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) 		 * just like anything else we transmit.  It is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) 		 * true data, and if we misinform our callers that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) 		 * this ACK acks real data, we will erroneously exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) 		 * connection startup slow start one packet too
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) 		 * quickly.  This is severely frowned upon behavior.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) 		if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) 			flag |= FLAG_DATA_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) 			flag |= FLAG_SYN_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) 			tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) 		if (!fully_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) 		tcp_ack_tstamp(sk, skb, prior_snd_una);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) 		next = skb_rb_next(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) 		if (unlikely(skb == tp->retransmit_skb_hint))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) 			tp->retransmit_skb_hint = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) 		if (unlikely(skb == tp->lost_skb_hint))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) 			tp->lost_skb_hint = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) 		tcp_highest_sack_replace(sk, skb, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) 		tcp_rtx_queue_unlink_and_free(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) 	if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) 		tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) 	if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) 		tp->snd_up = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) 	if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) 		tcp_ack_tstamp(sk, skb, prior_snd_una);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) 		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) 			flag |= FLAG_SACK_RENEGING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) 	if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) 		seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) 		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) 		if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) 		    last_in_flight && !prior_sacked && fully_acked &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) 		    sack->rate->prior_delivered + 1 == tp->delivered &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) 		    !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) 			/* Conservatively mark a delayed ACK. It's typically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) 			 * from a lone runt packet over the round trip to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) 			 * a receiver w/o out-of-order or CE events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) 			flag |= FLAG_ACK_MAYBE_DELAYED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) 	if (sack->first_sackt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) 		sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) 		ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) 	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) 					ca_rtt_us, sack->rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) 	if (flag & FLAG_ACKED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) 		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) 		if (unlikely(icsk->icsk_mtup.probe_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) 			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) 			tcp_mtup_probe_success(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) 		if (tcp_is_reno(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) 			tcp_remove_reno_sacks(sk, pkts_acked, ece_ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) 			/* If any of the cumulatively ACKed segments was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) 			 * retransmitted, non-SACK case cannot confirm that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) 			 * progress was due to original transmission due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) 			 * lack of TCPCB_SACKED_ACKED bits even if some of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) 			 * the packets may have been never retransmitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) 			if (flag & FLAG_RETRANS_DATA_ACKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) 				flag &= ~FLAG_ORIG_SACK_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) 			int delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) 			/* Non-retransmitted hole got filled? That's reordering */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) 			if (before(reord, prior_fack))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) 				tcp_check_sack_reordering(sk, reord, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) 			delta = prior_sacked - tp->sacked_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) 			tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) 	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) 		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) 						    tcp_skb_timestamp_us(skb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) 		/* Do not re-arm RTO if the sack RTT is measured from data sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) 		 * after when the head was last (re)transmitted. Otherwise the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) 		 * timeout may continue to extend in loss recovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) 		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) 	if (icsk->icsk_ca_ops->pkts_acked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) 		struct ack_sample sample = { .pkts_acked = pkts_acked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) 					     .rtt_us = sack->rate->rtt_us,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) 					     .in_flight = last_in_flight };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) 		icsk->icsk_ca_ops->pkts_acked(sk, &sample);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) #if FASTRETRANS_DEBUG > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) 	WARN_ON((int)tp->sacked_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) 	WARN_ON((int)tp->lost_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) 	WARN_ON((int)tp->retrans_out < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) 	if (!tp->packets_out && tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) 		icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) 		if (tp->lost_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) 			pr_debug("Leak l=%u %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) 				 tp->lost_out, icsk->icsk_ca_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) 			tp->lost_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) 		if (tp->sacked_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) 			pr_debug("Leak s=%u %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) 				 tp->sacked_out, icsk->icsk_ca_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) 			tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) 		if (tp->retrans_out) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) 			pr_debug("Leak r=%u %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) 				 tp->retrans_out, icsk->icsk_ca_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) 			tp->retrans_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) 	return flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) static void tcp_ack_probe(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) 	struct sk_buff *head = tcp_send_head(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) 	/* Was it a usable window open? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) 	if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) 	if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) 		icsk->icsk_backoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) 		icsk->icsk_probes_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) 		/* Socket must be waked up by subsequent tcp_data_snd_check().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) 		 * This function is not for random using!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) 		unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) 		when = tcp_clamp_probe0_to_user_timeout(sk, when);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) 		tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) 	return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) 		inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) /* Decide wheather to run the increase function of congestion control. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) 	/* If reordering is high then always grow cwnd whenever data is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) 	 * delivered regardless of its ordering. Otherwise stay conservative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) 	 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) 	 * new SACK or ECE mark may first advance cwnd here and later reduce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) 	 * cwnd in tcp_fastretrans_alert() based on more states.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) 	if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) 		return flag & FLAG_FORWARD_PROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) 	return flag & FLAG_DATA_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) /* The "ultimate" congestion control function that aims to replace the rigid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419)  * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420)  * It's called toward the end of processing an ACK with precise rate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421)  * information. All transmission or retransmission are delayed afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) 			     int flag, const struct rate_sample *rs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) 	if (icsk->icsk_ca_ops->cong_control) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) 		icsk->icsk_ca_ops->cong_control(sk, rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) 	if (tcp_in_cwnd_reduction(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) 		/* Reduce cwnd if state mandates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) 		tcp_cwnd_reduction(sk, acked_sacked, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) 	} else if (tcp_may_raise_cwnd(sk, flag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) 		/* Advance cwnd if state allows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) 		tcp_cong_avoid(sk, ack, acked_sacked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) 	tcp_update_pacing_rate(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) /* Check that window update is acceptable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444)  * The function assumes that snd_una<=ack<=snd_next.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) static inline bool tcp_may_update_window(const struct tcp_sock *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) 					const u32 ack, const u32 ack_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) 					const u32 nwin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) 	return	after(ack, tp->snd_una) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) 		after(ack_seq, tp->snd_wl1) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) 		(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) /* If we update tp->snd_una, also update tp->bytes_acked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) 	u32 delta = ack - tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) 	sock_owned_by_me((struct sock *)tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) 	tp->bytes_acked += delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) 	tp->snd_una = ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) /* If we update tp->rcv_nxt, also update tp->bytes_received */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) 	u32 delta = seq - tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) 	sock_owned_by_me((struct sock *)tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) 	tp->bytes_received += delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) 	WRITE_ONCE(tp->rcv_nxt, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) /* Update our send window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478)  * and in FreeBSD. NetBSD's one is even worse.) is wrong.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) 				 u32 ack_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) 	int flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) 	u32 nwin = ntohs(tcp_hdr(skb)->window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) 	if (likely(!tcp_hdr(skb)->syn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) 		nwin <<= tp->rx_opt.snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) 	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) 		flag |= FLAG_WIN_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) 		tcp_update_wl(tp, ack_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) 		if (tp->snd_wnd != nwin) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) 			tp->snd_wnd = nwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) 			/* Note, it is the only place, where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) 			 * fast path is recovered for sending TCP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) 			tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) 			tcp_fast_path_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) 			if (!tcp_write_queue_empty(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) 				tcp_slow_start_after_idle_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) 			if (nwin > tp->max_window) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) 				tp->max_window = nwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) 				tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) 	tcp_snd_una_update(tp, ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) 	return flag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) 				   u32 *last_oow_ack_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) 	if (*last_oow_ack_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) 		s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) 		if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) 			NET_INC_STATS(net, mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) 			return true;	/* rate-limited: don't send yet! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) 	*last_oow_ack_time = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) 	return false;	/* not rate-limited: go ahead, send dupack now! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) /* Return true if we're currently rate-limiting out-of-window ACKs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536)  * thus shouldn't send a dupack right now. We rate-limit dupacks in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537)  * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538)  * attacks that send repeated SYNs or ACKs for the same connection. To
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539)  * do this, we do not send a duplicate SYNACK or ACK if the remote
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540)  * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) 			  int mib_idx, u32 *last_oow_ack_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) 	/* Data packets without SYNs are not likely part of an ACK loop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) 	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) 	    !tcp_hdr(skb)->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) 	return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) /* RFC 5961 7 [ACK Throttling] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) 	/* unprotected vars, we dont care of overwrites */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) 	static u32 challenge_timestamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) 	static unsigned int challenge_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) 	struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) 	u32 count, now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) 	/* First check our per-socket dupack rate limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) 	if (__tcp_oow_rate_limited(net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) 				   LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) 				   &tp->last_oow_ack_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) 	/* Then check host-wide RFC 5961 rate limit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) 	now = jiffies / HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) 	if (now != challenge_timestamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) 		u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) 		u32 half = (ack_limit + 1) >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) 		challenge_timestamp = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) 		WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) 	count = READ_ONCE(challenge_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) 	if (count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) 		WRITE_ONCE(challenge_count, count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) 		NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) 		tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) static void tcp_store_ts_recent(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) 	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) 	if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) 		/* PAWS bug workaround wrt. ACK frames, the PAWS discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) 		 * extra check below makes sure this can only happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) 		 * for pure ACK frames.  -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) 		 * Not only, also it occurs for expired timestamps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) 		if (tcp_paws_check(&tp->rx_opt, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) 			tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) /* This routine deals with acks during a TLP episode and ends an episode by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608)  * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) 	if (before(ack, tp->tlp_high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) 	if (!tp->tlp_retrans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) 		/* TLP of new data has been acknowledged */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) 		tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) 	} else if (flag & FLAG_DSACKING_ACK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) 		/* This DSACK means original and TLP probe arrived; no loss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) 		tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) 	} else if (after(ack, tp->tlp_high_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) 		/* ACK advances: there was a loss, so reduce cwnd. Reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) 		 * tlp_high_seq in tcp_init_cwnd_reduction()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) 		tcp_init_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) 		tcp_set_ca_state(sk, TCP_CA_CWR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) 		tcp_end_cwnd_reduction(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) 		tcp_try_keep_open(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) 		NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) 				LINUX_MIB_TCPLOSSPROBERECOVERY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) 	} else if (!(flag & (FLAG_SND_UNA_ADVANCED |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) 			     FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) 		/* Pure dupack: original and TLP probe arrived; no loss */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) 		tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) 	const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) 	if (icsk->icsk_ca_ops->in_ack_event)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) 		icsk->icsk_ca_ops->in_ack_event(sk, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) /* Congestion control has updated the cwnd already. So if we're in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649)  * loss recovery then now we do any new sends (for FRTO) or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650)  * retransmits (for CA_Loss or CA_recovery) that make sense.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) static void tcp_xmit_recovery(struct sock *sk, int rexmit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) 	if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) 	if (unlikely(rexmit == REXMIT_NEW)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) 		__tcp_push_pending_frames(sk, tcp_current_mss(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) 					  TCP_NAGLE_OFF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) 		if (after(tp->snd_nxt, tp->high_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) 		tp->frto = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) 	tcp_xmit_retransmit_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) /* Returns the number of packets newly acked or sacked by the current ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) 	const struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) 	u32 delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) 	delivered = tp->delivered - prior_delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) 	NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) 	if (flag & FLAG_ECE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) 		NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) 	return delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) /* This routine deals with incoming acks, but not outgoing ones. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) 	struct tcp_sacktag_state sack_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) 	struct rate_sample rs = { .prior_delivered = 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) 	u32 prior_snd_una = tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) 	bool is_sack_reneg = tp->is_sack_reneg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) 	int num_dupack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) 	int prior_packets = tp->packets_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) 	u32 delivered = tp->delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) 	u32 lost = tp->lost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) 	u32 prior_fack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) 	sack_state.first_sackt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) 	sack_state.rate = &rs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) 	sack_state.sack_delivered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) 	/* We very likely will need to access rtx queue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) 	prefetch(sk->tcp_rtx_queue.rb_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) 	/* If the ack is older than previous acks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) 	 * then we can probably ignore it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) 	if (before(ack, prior_snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) 		/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) 		if (before(ack, prior_snd_una - tp->max_window)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) 			if (!(flag & FLAG_NO_CHALLENGE_ACK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) 				tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) 		goto old_ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) 	/* If the ack includes data we haven't sent yet, discard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) 	 * this segment (RFC793 Section 3.9).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) 	if (after(ack, tp->snd_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) 	if (after(ack, prior_snd_una)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) 		flag |= FLAG_SND_UNA_ADVANCED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) 		icsk->icsk_retransmits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) #if IS_ENABLED(CONFIG_TLS_DEVICE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) 		if (static_branch_unlikely(&clean_acked_data_enabled.key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) 			if (icsk->icsk_clean_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) 				icsk->icsk_clean_acked(sk, ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) 	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) 	rs.prior_in_flight = tcp_packets_in_flight(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) 	/* ts_recent update must be made after we are sure that the packet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) 	 * is in window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) 	if (flag & FLAG_UPDATE_TS_RECENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) 		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) 	if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) 	    FLAG_SND_UNA_ADVANCED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) 		/* Window is constant, pure forward advance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) 		 * No more checks are required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) 		 * Note, we use the fact that SND.UNA>=SND.WL2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) 		tcp_update_wl(tp, ack_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) 		tcp_snd_una_update(tp, ack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) 		flag |= FLAG_WIN_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) 		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) 		u32 ack_ev_flags = CA_ACK_SLOWPATH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) 		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) 			flag |= FLAG_DATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) 		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) 		if (TCP_SKB_CB(skb)->sacked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) 							&sack_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) 		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) 			flag |= FLAG_ECE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) 			ack_ev_flags |= CA_ACK_ECE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) 		if (sack_state.sack_delivered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) 			tcp_count_delivered(tp, sack_state.sack_delivered,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) 					    flag & FLAG_ECE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) 		if (flag & FLAG_WIN_UPDATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) 			ack_ev_flags |= CA_ACK_WIN_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) 		tcp_in_ack_event(sk, ack_ev_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) 	/* This is a deviation from RFC3168 since it states that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) 	 * "When the TCP data sender is ready to set the CWR bit after reducing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) 	 * the congestion window, it SHOULD set the CWR bit only on the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) 	 * new data packet that it transmits."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) 	 * We accept CWR on pure ACKs to be more robust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) 	 * with widely-deployed TCP implementations that do this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) 	tcp_ecn_accept_cwr(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) 	/* We passed data and got it acked, remove any soft error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) 	 * log. Something worked...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) 	sk->sk_err_soft = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) 	icsk->icsk_probes_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) 	tp->rcv_tstamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) 	if (!prior_packets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) 		goto no_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) 	/* See if we can take anything off of the retransmit queue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) 	flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) 				    flag & FLAG_ECE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) 	tcp_rack_update_reo_wnd(sk, &rs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) 	if (tp->tlp_high_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) 		tcp_process_tlp_ack(sk, ack, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) 	if (tcp_ack_is_dubious(sk, flag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) 		if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) 			num_dupack = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) 			/* Consider if pure acks were aggregated in tcp_add_backlog() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) 			if (!(flag & FLAG_DATA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) 				num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) 		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) 				      &rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) 	/* If needed, reset TLP/RTO timer when RACK doesn't set. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) 	if (flag & FLAG_SET_XMIT_TIMER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) 		tcp_set_xmit_timer(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) 		sk_dst_confirm(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) 	delivered = tcp_newly_delivered(sk, delivered, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) 	lost = tp->lost - lost;			/* freshly marked lost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) 	rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) 	tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) 	tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) 	tcp_xmit_recovery(sk, rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) no_queue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) 	/* If data was DSACKed, see if we can undo a cwnd reduction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) 	if (flag & FLAG_DSACKING_ACK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) 		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) 				      &rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) 		tcp_newly_delivered(sk, delivered, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) 	/* If this ack opens up a zero window, clear backoff.  It was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) 	 * being used to time the probes, and is probably far higher than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) 	 * it needs to be for normal retransmission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) 	tcp_ack_probe(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) 	if (tp->tlp_high_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) 		tcp_process_tlp_ack(sk, ack, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) old_ack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) 	/* If data was SACKed, tag it and see if we should send more data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) 	 * If data was DSACKed, see if we can undo a cwnd reduction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) 	if (TCP_SKB_CB(skb)->sacked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) 						&sack_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) 		tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) 				      &rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) 		tcp_newly_delivered(sk, delivered, flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) 		tcp_xmit_recovery(sk, rexmit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) 				      bool syn, struct tcp_fastopen_cookie *foc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) 				      bool exp_opt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) 	/* Valid only in SYN or SYN-ACK with an even length.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) 	if (!foc || !syn || len < 0 || (len & 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) 	if (len >= TCP_FASTOPEN_COOKIE_MIN &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) 	    len <= TCP_FASTOPEN_COOKIE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) 		memcpy(foc->val, cookie, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) 	else if (len != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) 		len = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) 	foc->len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) 	foc->exp = exp_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) static bool smc_parse_options(const struct tcphdr *th,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) 			      struct tcp_options_received *opt_rx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) 			      const unsigned char *ptr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) 			      int opsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) #if IS_ENABLED(CONFIG_SMC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) 	if (static_branch_unlikely(&tcp_have_smc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) 		if (th->syn && !(opsize & 1) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) 		    opsize >= TCPOLEN_EXP_SMC_BASE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) 		    get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) 			opt_rx->smc_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) /* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912)  * value on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) 	const unsigned char *ptr = (const unsigned char *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) 	int length = (th->doff * 4) - sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) 	u16 mss = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) 	while (length > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) 		int opcode = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) 		int opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) 		switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) 		case TCPOPT_EOL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) 			return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) 			length--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) 			if (length < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) 				return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) 			opsize = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) 			if (opsize < 2) /* "silly options" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) 				return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) 			if (opsize > length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) 				return mss;	/* fail on partial options */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) 			if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) 				u16 in_mss = get_unaligned_be16(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) 				if (in_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) 					if (user_mss && user_mss < in_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) 						in_mss = user_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) 					mss = in_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) 			ptr += opsize - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) 			length -= opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) 	return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) /* Look for tcp options. Normally only called on SYN and SYNACK packets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955)  * But, this can also be called on packets in the established flow when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956)  * the fast version below fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) void tcp_parse_options(const struct net *net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) 		       const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) 		       struct tcp_options_received *opt_rx, int estab,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) 		       struct tcp_fastopen_cookie *foc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) 	const unsigned char *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) 	const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) 	int length = (th->doff * 4) - sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) 	ptr = (const unsigned char *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) 	opt_rx->saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) 	opt_rx->saw_unknown = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) 	while (length > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) 		int opcode = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) 		int opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) 		switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) 		case TCPOPT_EOL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) 			length--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) 			if (length < 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) 			opsize = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) 			if (opsize < 2) /* "silly options" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) 			if (opsize > length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) 				return;	/* don't parse partial options */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) 			switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) 			case TCPOPT_MSS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) 				if (opsize == TCPOLEN_MSS && th->syn && !estab) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) 					u16 in_mss = get_unaligned_be16(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) 					if (in_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) 						if (opt_rx->user_mss &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) 						    opt_rx->user_mss < in_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) 							in_mss = opt_rx->user_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) 						opt_rx->mss_clamp = in_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) 					}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) 			case TCPOPT_WINDOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) 				if (opsize == TCPOLEN_WINDOW && th->syn &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) 				    !estab && net->ipv4.sysctl_tcp_window_scaling) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) 					__u8 snd_wscale = *(__u8 *)ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) 					opt_rx->wscale_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) 					if (snd_wscale > TCP_MAX_WSCALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) 						net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) 								     __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) 								     snd_wscale,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) 								     TCP_MAX_WSCALE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) 						snd_wscale = TCP_MAX_WSCALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) 					}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) 					opt_rx->snd_wscale = snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) 			case TCPOPT_TIMESTAMP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) 				if ((opsize == TCPOLEN_TIMESTAMP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) 				    ((estab && opt_rx->tstamp_ok) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) 				     (!estab && net->ipv4.sysctl_tcp_timestamps))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) 					opt_rx->saw_tstamp = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) 					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) 					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) 			case TCPOPT_SACK_PERM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) 				    !estab && net->ipv4.sysctl_tcp_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) 					opt_rx->sack_ok = TCP_SACK_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) 					tcp_sack_reset(opt_rx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) 			case TCPOPT_SACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) 				if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) 				   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) 				   opt_rx->sack_ok) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) 					TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) #ifdef CONFIG_TCP_MD5SIG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) 			case TCPOPT_MD5SIG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) 				 * The MD5 Hash has already been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) 				 * checked (see tcp_v{4,6}_do_rcv()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) 			case TCPOPT_FASTOPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) 				tcp_parse_fastopen_option(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) 					opsize - TCPOLEN_FASTOPEN_BASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) 					ptr, th->syn, foc, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) 			case TCPOPT_EXP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) 				/* Fast Open option shares code 254 using a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) 				 * 16 bits magic number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) 				if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) 				    get_unaligned_be16(ptr) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) 				    TCPOPT_FASTOPEN_MAGIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) 					tcp_parse_fastopen_option(opsize -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) 						TCPOLEN_EXP_FASTOPEN_BASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) 						ptr + 2, th->syn, foc, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) 				if (smc_parse_options(th, opt_rx, ptr, opsize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) 				opt_rx->saw_unknown = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) 			default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) 				opt_rx->saw_unknown = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) 			ptr += opsize-2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) 			length -= opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) EXPORT_SYMBOL(tcp_parse_options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) 	const __be32 *ptr = (const __be32 *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) 	if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) 			  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) 		tp->rx_opt.saw_tstamp = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) 		++ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) 		tp->rx_opt.rcv_tsval = ntohl(*ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) 		++ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) 		if (*ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) 			tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) 			tp->rx_opt.rcv_tsecr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) /* Fast parse options. This hopes to only see timestamps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103)  * If it is wrong it falls back on tcp_parse_options().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) static bool tcp_fast_parse_options(const struct net *net,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) 				   const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) 				   const struct tcphdr *th, struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) 	/* In the spirit of fast parsing, compare doff directly to constant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) 	 * values.  Because equality is used, short doff can be ignored here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) 	if (th->doff == (sizeof(*th) / 4)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) 		tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) 	} else if (tp->rx_opt.tstamp_ok &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) 		   th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) 		if (tcp_parse_aligned_timestamp(tp, th))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) 	tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) #ifdef CONFIG_TCP_MD5SIG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130)  * Parse MD5 Signature option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) 	int length = (th->doff << 2) - sizeof(*th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) 	const u8 *ptr = (const u8 *)(th + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) 	/* If not enough data remaining, we can short cut */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) 	while (length >= TCPOLEN_MD5SIG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) 		int opcode = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) 		int opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) 		switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) 		case TCPOPT_EOL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) 		case TCPOPT_NOP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) 			length--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) 			opsize = *ptr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) 			if (opsize < 2 || opsize > length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) 				return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) 			if (opcode == TCPOPT_MD5SIG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) 				return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) 		ptr += opsize - 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) 		length -= opsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) EXPORT_SYMBOL(tcp_parse_md5sig_option);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165)  * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166)  * it can pass through stack. So, the following predicate verifies that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167)  * this segment is not used for anything but congestion avoidance or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168)  * fast retransmit. Moreover, we even are able to eliminate most of such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169)  * second order effects, if we apply some small "replay" window (~RTO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170)  * to timestamp space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172)  * All these measures still do not guarantee that we reject wrapped ACKs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173)  * on networks with high bandwidth, when sequence space is recycled fastly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174)  * but it guarantees that such events will be very rare and do not affect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175)  * connection seriously. This doesn't look nice, but alas, PAWS is really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176)  * buggy extension.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178)  * [ Later note. Even worse! It is buggy for segments _with_ data. RFC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179)  * states that events when retransmit arrives after original data are rare.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180)  * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181)  * the biggest problem on large power networks even with minor reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182)  * OK, let's give it small replay window. If peer clock is even 1hz, it is safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183)  * up to bandwidth of 18Gigabit/sec. 8) ]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) 	const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) 	u32 seq = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) 	return (/* 1. Pure ACK with correct sequence number. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) 		(th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) 		/* 2. ... and duplicate ACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) 		ack == tp->snd_una &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) 		/* 3. ... and does not update window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) 		!tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) 		/* 4. ... and sits in replay window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) 		(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) static inline bool tcp_paws_discard(const struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) 				   const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) 	return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) 	       !tcp_disordered_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) /* Check segment sequence number for validity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217)  * Segment controls are considered valid, if the segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218)  * fits to the window after truncation to the window. Acceptability
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219)  * of data (and SYN, FIN, of course) is checked separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220)  * See tcp_data_queue(), for example.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222)  * Also, controls (RST is main one) are accepted using RCV.WUP instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223)  * of RCV.NXT. Peer still did not advance his SND.UNA when we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224)  * delayed ACK, so that hisSND.UNA<=ourRCV.WUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225)  * (borrowed from freebsd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) 	return	!before(end_seq, tp->rcv_wup) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) 		!after(seq, tp->rcv_nxt + tcp_receive_window(tp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) /* When we get a reset we do this. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) void tcp_reset(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) 	trace_tcp_receive_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) 	/* We want the right error as BSD sees it (and indeed as we do). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) 	switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) 	case TCP_SYN_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) 		sk->sk_err = ECONNREFUSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) 	case TCP_CLOSE_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) 		sk->sk_err = EPIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) 	case TCP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) 		sk->sk_err = ECONNRESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) 	/* This barrier is coupled with smp_rmb() in tcp_poll() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) 	tcp_write_queue_purge(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) 	tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) 	if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) 		sk->sk_error_report(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263)  * 	Process the FIN bit. This now behaves as it is supposed to work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264)  *	and the FIN takes effect when it is validly part of sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265)  *	space. Not before when we get holes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267)  *	If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268)  *	(and thence onto LAST-ACK and finally, CLOSE, we never enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269)  *	TIME-WAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271)  *	If we are in FINWAIT-1, a received FIN indicates simultaneous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272)  *	close and we go into CLOSING (and later onto TIME-WAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274)  *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) void tcp_fin(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) 	inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) 	sk->sk_shutdown |= RCV_SHUTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) 	sock_set_flag(sk, SOCK_DONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) 	switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) 	case TCP_SYN_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) 	case TCP_ESTABLISHED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) 		/* Move to CLOSE_WAIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) 		tcp_set_state(sk, TCP_CLOSE_WAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) 		inet_csk_enter_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) 	case TCP_CLOSE_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) 	case TCP_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) 		/* Received a retransmission of the FIN, do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) 		 * nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) 	case TCP_LAST_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) 		/* RFC793: Remain in the LAST-ACK state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) 	case TCP_FIN_WAIT1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) 		/* This case occurs when a simultaneous close
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) 		 * happens, we must ack the received FIN and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) 		 * enter the CLOSING state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) 		tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) 		tcp_set_state(sk, TCP_CLOSING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) 	case TCP_FIN_WAIT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) 		/* Received a FIN -- send ACK and enter TIME_WAIT. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) 		tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) 		tcp_time_wait(sk, TCP_TIME_WAIT, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) 		/* Only TCP_LISTEN and TCP_CLOSE are left, in these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) 		 * cases we should never reach this piece of code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) 		pr_err("%s: Impossible, sk->sk_state=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) 		       __func__, sk->sk_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) 	/* It _is_ possible, that we have something out-of-order _after_ FIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) 	 * Probably, we should reset in this case. For now drop them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) 	skb_rbtree_purge(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) 	if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) 		tcp_sack_reset(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) 	sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) 	if (!sock_flag(sk, SOCK_DEAD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) 		sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) 		/* Do not send POLL_HUP for half duplex close. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) 		if (sk->sk_shutdown == SHUTDOWN_MASK ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) 		    sk->sk_state == TCP_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) 			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) 			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) static inline bool tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) 				  u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) 	if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) 		if (before(seq, sp->start_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) 			sp->start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) 		if (after(end_seq, sp->end_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) 			sp->end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) 	if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) 		int mib_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) 		if (before(seq, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) 			mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) 			mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) 		NET_INC_STATS(sock_net(sk), mib_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) 		tp->rx_opt.dsack = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) 		tp->duplicate_sack[0].start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) 		tp->duplicate_sack[0].end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) 	if (!tp->rx_opt.dsack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) 		tcp_dsack_set(sk, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) 		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) 	/* When the ACK path fails or drops most ACKs, the sender would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) 	 * timeout and spuriously retransmit the same segment repeatedly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) 	 * The receiver remembers and reflects via DSACKs. Leverage the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) 	 * DSACK state and change the txhash to re-route speculatively.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) 	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) 	    sk_rethink_txhash(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) 		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) 		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) 			tcp_rcv_spurious_retrans(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) 				end_seq = tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) 			tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) 	tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) /* These routines update the SACK block as out-of-order packets arrive or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423)  * in-order packets close up the sequence space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) 	int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) 	struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) 	struct tcp_sack_block *swalk = sp + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) 	/* See if the recent change to the first SACK eats into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) 	 * or hits the sequence space of other SACK blocks, if so coalesce.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) 	for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) 		if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) 			int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) 			/* Zap SWALK, by moving every further SACK up by one slot.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) 			 * Decrease num_sacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) 			tp->rx_opt.num_sacks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) 			for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) 				sp[i] = sp[i + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) 		this_sack++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) 		swalk++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) static void tcp_sack_compress_send_ack(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) 	if (!tp->compressed_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) 	if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) 		__sock_put(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) 	/* Since we have to send one ack finally,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) 	 * substract one from tp->compressed_ack to keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) 	 * LINUX_MIB_TCPACKCOMPRESSED accurate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) 	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) 		      tp->compressed_ack - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) 	tp->compressed_ack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) 	tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) /* Reasonable amount of sack blocks included in TCP SACK option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473)  * The max is 4, but this becomes 3 if TCP timestamps are there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474)  * Given that SACK packets might be lost, be conservative and use 2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) #define TCP_SACK_BLOCKS_EXPECTED 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) 	struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) 	int cur_sacks = tp->rx_opt.num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) 	int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) 	if (!cur_sacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) 		goto new_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) 	for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) 		if (tcp_sack_extend(sp, seq, end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) 			if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) 				tcp_sack_compress_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) 			/* Rotate this_sack to the first one. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) 			for (; this_sack > 0; this_sack--, sp--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) 				swap(*sp, *(sp - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) 			if (cur_sacks > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) 				tcp_sack_maybe_coalesce(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) 	if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) 		tcp_sack_compress_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) 	/* Could not find an adjacent existing SACK, build a new one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) 	 * put it at the front, and shift everyone else down.  We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) 	 * always know there is at least one SACK present already here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) 	 * If the sack array is full, forget about the last one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) 	if (this_sack >= TCP_NUM_SACKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) 		this_sack--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) 		tp->rx_opt.num_sacks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) 		sp--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) 	for (; this_sack > 0; this_sack--, sp--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) 		*sp = *(sp - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) new_sack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) 	/* Build the new head SACK, and we're done. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) 	sp->start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) 	sp->end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) 	tp->rx_opt.num_sacks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) /* RCV.NXT advances, some SACKs should be eaten. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) static void tcp_sack_remove(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) 	struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) 	int num_sacks = tp->rx_opt.num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) 	int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) 	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) 		tp->rx_opt.num_sacks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) 	for (this_sack = 0; this_sack < num_sacks;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) 		/* Check if the start of the sack is covered by RCV.NXT. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) 		if (!before(tp->rcv_nxt, sp->start_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) 			int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) 			/* RCV.NXT must cover all the block! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) 			WARN_ON(before(tp->rcv_nxt, sp->end_seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) 			/* Zap this SACK, by moving forward any other SACKS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) 			for (i = this_sack+1; i < num_sacks; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) 				tp->selective_acks[i-1] = tp->selective_acks[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) 			num_sacks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) 		this_sack++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) 		sp++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) 	tp->rx_opt.num_sacks = num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560)  * tcp_try_coalesce - try to merge skb to prior one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561)  * @sk: socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562)  * @to: prior buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563)  * @from: buffer to add in queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564)  * @fragstolen: pointer to boolean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566)  * Before queueing skb @from after @to, try to merge them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567)  * to reduce overall memory use and queue lengths, if cost is small.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568)  * Packets in ofo or receive queues can stay a long time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569)  * Better try to coalesce them right now to avoid future collapses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570)  * Returns true if caller should free @from instead of queueing it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) static bool tcp_try_coalesce(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) 			     struct sk_buff *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) 			     struct sk_buff *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) 			     bool *fragstolen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) 	int delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) 	*fragstolen = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) 	/* Its possible this segment overlaps with prior segment in queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) 	if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) 	if (!mptcp_skb_can_collapse(to, from))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) #ifdef CONFIG_TLS_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) 	if (from->decrypted != to->decrypted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) 	if (!skb_try_coalesce(to, from, fragstolen, &delta))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) 	atomic_add(delta, &sk->sk_rmem_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) 	sk_mem_charge(sk, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) 	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) 	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) 	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) 	if (TCP_SKB_CB(from)->has_rxtstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) 		TCP_SKB_CB(to)->has_rxtstamp = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) 		to->tstamp = from->tstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) 		skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) static bool tcp_ooo_try_coalesce(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) 			     struct sk_buff *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) 			     struct sk_buff *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) 			     bool *fragstolen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) 	bool res = tcp_try_coalesce(sk, to, from, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) 	/* In case tcp_drop() is called later, update to->gso_segs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) 	if (res) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) 		u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) 			       max_t(u16, 1, skb_shinfo(from)->gso_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) 		skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) 	return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) static void tcp_drop(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) 	trace_android_vh_kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) 	sk_drops_add(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) 	__kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) /* This one checks to see if we can put data from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637)  * out_of_order queue into the receive_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) static void tcp_ofo_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) 	__u32 dsack_high = tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) 	bool fin, fragstolen, eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) 	struct sk_buff *skb, *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) 	struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) 	p = rb_first(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) 	while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) 		skb = rb_to_skb(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) 		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) 		if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654) 			__u32 dsack = dsack_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) 			if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) 				dsack_high = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) 			tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) 		p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) 			tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) 		tail = skb_peek_tail(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) 		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) 		if (!eaten)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) 			__skb_queue_tail(&sk->sk_receive_queue, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) 			kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) 		if (unlikely(fin)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) 			tcp_fin(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) 			/* tcp_fin() purges tp->out_of_order_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) 			 * so we must end this loop right now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) static bool tcp_prune_ofo_queue(struct sock *sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) static int tcp_prune_queue(struct sock *sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) 				 unsigned int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) 	    !sk_rmem_schedule(sk, skb, size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) 		if (tcp_prune_queue(sk) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) 		while (!sk_rmem_schedule(sk, skb, size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) 			if (!tcp_prune_ofo_queue(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) 				return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) 	struct rb_node **p, *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) 	struct sk_buff *skb1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) 	u32 seq, end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) 	bool fragstolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) 	tcp_ecn_check_ce(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) 	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) 		sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) 		tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) 	/* Disable header prediction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) 	tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) 	inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) 	tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) 	seq = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) 	end_seq = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) 	p = &tp->out_of_order_queue.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) 		/* Initial out of order segment, build 1 SACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) 		if (tcp_is_sack(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) 			tp->rx_opt.num_sacks = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) 			tp->selective_acks[0].start_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) 			tp->selective_acks[0].end_seq = end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) 		rb_link_node(&skb->rbnode, NULL, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) 		rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) 		tp->ooo_last_skb = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) 		goto end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) 	/* In the typical case, we are adding an skb to the end of the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) 	if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) 				 skb, &fragstolen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) coalesce_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) 		/* For non sack flows, do not grow window to force DUPACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) 		 * and trigger fast retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) 		if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) 			tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) 		kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) 		skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) 		goto add_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) 	/* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) 	if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) 		parent = &tp->ooo_last_skb->rbnode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) 		p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) 		goto insert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) 	/* Find place to insert this segment. Handle overlaps on the way. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) 	parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) 	while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) 		parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) 		skb1 = rb_to_skb(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) 		if (before(seq, TCP_SKB_CB(skb1)->seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) 			p = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) 		if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) 			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) 				/* All the bits are present. Drop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) 				NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) 					      LINUX_MIB_TCPOFOMERGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) 				tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) 				skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) 				tcp_dsack_set(sk, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) 				goto add_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) 			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) 				/* Partial overlap. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) 				tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) 				/* skb's seq == skb1's seq and skb covers skb1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) 				 * Replace skb1 with skb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) 				rb_replace_node(&skb1->rbnode, &skb->rbnode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) 						&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) 				tcp_dsack_extend(sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) 						 TCP_SKB_CB(skb1)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) 						 TCP_SKB_CB(skb1)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) 				NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) 					      LINUX_MIB_TCPOFOMERGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) 				tcp_drop(sk, skb1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) 				goto merge_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) 		} else if (tcp_ooo_try_coalesce(sk, skb1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) 						skb, &fragstolen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) 			goto coalesce_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) 		p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) insert:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) 	/* Insert segment into RB tree. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) 	rb_link_node(&skb->rbnode, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) 	rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) merge_right:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) 	/* Remove other segments covered by skb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) 	while ((skb1 = skb_rb_next(skb)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) 		if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) 		if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) 			tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) 					 end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) 		rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) 		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) 				 TCP_SKB_CB(skb1)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) 		tcp_drop(sk, skb1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) 	/* If there is no skb after us, we are the last_skb ! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) 	if (!skb1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) 		tp->ooo_last_skb = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) add_sack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) 	if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) 		tcp_sack_new_ofo_skb(sk, seq, end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) 	if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) 		/* For non sack flows, do not grow window to force DUPACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) 		 * and trigger fast retransmit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) 		if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) 			tcp_grow_window(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) 		skb_condense(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) 		skb_set_owner_r(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) 				      bool *fragstolen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) 	int eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) 	struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) 	eaten = (tail &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) 		 tcp_try_coalesce(sk, tail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) 				  skb, fragstolen)) ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) 	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) 	if (!eaten) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) 		__skb_queue_tail(&sk->sk_receive_queue, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) 		skb_set_owner_r(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) 	return eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) 	int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) 	int data_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) 	bool fragstolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) 	if (size == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) 	if (size > PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) 		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) 		data_len = npages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) 		size = data_len + (size & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) 	skb = alloc_skb_with_frags(size - data_len, data_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) 				   PAGE_ALLOC_COSTLY_ORDER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) 				   &err, sk->sk_allocation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) 	if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) 	skb_put(skb, size - data_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) 	skb->data_len = data_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) 	skb->len = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) 		goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) 		goto err_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) 	TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) 	if (tcp_queue_rcv(sk, skb, &fragstolen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) 		WARN_ON_ONCE(fragstolen); /* should not happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) 		__kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) 	return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) err_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) 	kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) void tcp_data_ready(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) 	int avail = tp->rcv_nxt - tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) 	if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) 	    !sock_flag(sk, SOCK_DONE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) 	    tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) 	sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) 	bool fragstolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) 	int eaten;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) 	if (sk_is_mptcp(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) 		mptcp_incoming_options(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) 		__kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) 	skb_dst_drop(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) 	__skb_pull(skb, tcp_hdr(skb)->doff * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) 	tp->rx_opt.dsack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) 	/*  Queue data for delivery to the user.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) 	 *  Packets in sequence go to the receive queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) 	 *  Out of sequence packets to the out_of_order_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) 	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) 		if (tcp_receive_window(tp) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) 			goto out_of_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) 		/* Ok. In sequence. In window. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) queue_and_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) 		if (skb_queue_len(&sk->sk_receive_queue) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) 			sk_forced_mem_schedule(sk, skb->truesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) 		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) 			sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) 			goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) 		eaten = tcp_queue_rcv(sk, skb, &fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) 		if (skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) 			tcp_event_data_recv(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) 			tcp_fin(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) 		if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) 			tcp_ofo_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) 			/* RFC5681. 4.2. SHOULD send immediate ACK, when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) 			 * gap in queue is filled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) 			if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) 				inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) 		if (tp->rx_opt.num_sacks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) 			tcp_sack_remove(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) 		tcp_fast_path_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) 		if (eaten > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) 			kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) 		if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) 			tcp_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) 	if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) 		tcp_rcv_spurious_retrans(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) 		/* A retransmit, 2nd most common case.  Force an immediate ack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) out_of_window:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) 		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) 		inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) 		tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) 	/* Out of window. F.e. zero window probe. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) 	if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) 		goto out_of_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) 	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) 		/* Partial packet, seq < rcv_next < end_seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) 		/* If window is closed, drop tail of packet. But after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) 		 * remembering D-SACK for its head made in previous line.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) 		if (!tcp_receive_window(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) 			goto out_of_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) 		goto queue_and_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) 	tcp_data_queue_ofo(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) 	if (list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) 		return !skb_queue_is_last(list, skb) ? skb->next : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) 	return skb_rb_next(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) 					struct sk_buff_head *list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) 					struct rb_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) 	struct sk_buff *next = tcp_skb_next(skb, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) 	if (list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) 		__skb_unlink(skb, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) 		rb_erase(&skb->rbnode, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) 	__kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) 	return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) 	struct rb_node **p = &root->rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) 	struct rb_node *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) 	struct sk_buff *skb1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) 	while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) 		parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) 		skb1 = rb_to_skb(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) 		if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) 			p = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) 			p = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) 	rb_link_node(&skb->rbnode, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) 	rb_insert_color(&skb->rbnode, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) /* Collapse contiguous sequence of skbs head..tail with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078)  * sequence numbers start..end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080)  * If tail is NULL, this means until the end of the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082)  * Segments with FIN/SYN are not collapsed (only because this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083)  * simplifies code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) 	     struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) 	struct sk_buff *skb = head, *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) 	struct sk_buff_head tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) 	bool end_of_skbs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) 	/* First, check that queue is collapsible and find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) 	 * the point where collapsing can be useful.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) 	for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) 		n = tcp_skb_next(skb, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) 		/* No new bits? It is possible on ofo queue. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) 		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) 			skb = tcp_collapse_one(sk, skb, list, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) 			if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) 			goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) 		/* The first skb to collapse is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) 		 * - not SYN/FIN and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) 		 * - bloated or contains data before "start" or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) 		 *   overlaps to the next one and mptcp allow collapsing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) 		if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) 		    (tcp_win_from_space(sk, skb->truesize) > skb->len ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) 		     before(TCP_SKB_CB(skb)->seq, start))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) 			end_of_skbs = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) 		if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) 		    TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) 			end_of_skbs = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) 		/* Decided to skip this, advance start seq. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) 		start = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) 	if (end_of_skbs ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) 	    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) 	__skb_queue_head_init(&tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) 	while (before(start, end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) 		int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) 		struct sk_buff *nskb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) 		nskb = alloc_skb(copy, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) 		if (!nskb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) #ifdef CONFIG_TLS_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) 		nskb->decrypted = skb->decrypted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) 		if (list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) 			__skb_queue_before(list, skb, nskb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) 			__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) 		skb_set_owner_r(nskb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) 		mptcp_skb_ext_move(nskb, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) 		/* Copy data, releasing collapsed skbs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) 		while (copy > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) 			int offset = start - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) 			int size = TCP_SKB_CB(skb)->end_seq - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) 			BUG_ON(offset < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) 			if (size > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) 				size = min(copy, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) 				if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) 					BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) 				TCP_SKB_CB(nskb)->end_seq += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) 				copy -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) 				start += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) 			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) 				skb = tcp_collapse_one(sk, skb, list, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) 				if (!skb ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) 				    skb == tail ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) 				    !mptcp_skb_can_collapse(nskb, skb) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) 				    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) 					goto end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) #ifdef CONFIG_TLS_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) 				if (skb->decrypted != nskb->decrypted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) 					goto end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) 	skb_queue_walk_safe(&tmp, skb, n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) 		tcp_rbtree_insert(root, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189)  * and tcp_collapse() them until all the queue is collapsed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) static void tcp_collapse_ofo_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) 	u32 range_truesize, sum_tiny = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) 	struct sk_buff *skb, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) 	u32 start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) 	skb = skb_rb_first(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) new_range:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) 	if (!skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) 		tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) 	start = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) 	end = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) 	range_truesize = skb->truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) 	for (head = skb;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) 		skb = skb_rb_next(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) 		/* Range is terminated when we see a gap or when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) 		 * we are at the queue end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) 		if (!skb ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) 		    after(TCP_SKB_CB(skb)->seq, end) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) 			/* Do not attempt collapsing tiny skbs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) 			if (range_truesize != head->truesize ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) 			    end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) 				tcp_collapse(sk, NULL, &tp->out_of_order_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) 					     head, skb, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) 				sum_tiny += range_truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) 				if (sum_tiny > sk->sk_rcvbuf >> 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) 					return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) 			goto new_range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) 		range_truesize += skb->truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) 		if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) 			start = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) 		if (after(TCP_SKB_CB(skb)->end_seq, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) 			end = TCP_SKB_CB(skb)->end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239)  * Clean the out-of-order queue to make room.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240)  * We drop high sequences packets to :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241)  * 1) Let a chance for holes to be filled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242)  * 2) not add too big latencies if thousands of packets sit there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243)  *    (But if application shrinks SO_RCVBUF, we could still end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244)  *     freeing whole queue here)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245)  * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247)  * Return true if queue has shrunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) static bool tcp_prune_ofo_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252) 	struct rb_node *node, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) 	int goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) 	goal = sk->sk_rcvbuf >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) 	node = &tp->ooo_last_skb->rbnode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) 		prev = rb_prev(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) 		rb_erase(node, &tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) 		goal -= rb_to_skb(node)->truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) 		tcp_drop(sk, rb_to_skb(node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) 		if (!prev || goal <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) 			sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) 			if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) 			    !tcp_under_memory_pressure(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) 			goal = sk->sk_rcvbuf >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) 		node = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) 	} while (node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) 	tp->ooo_last_skb = rb_to_skb(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) 	/* Reset SACK state.  A conforming SACK implementation will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) 	 * do the same at a timeout based retransmit.  When a connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) 	 * is in a sad state like this, we care only about integrity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) 	 * of the connection not performance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) 	if (tp->rx_opt.sack_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) 		tcp_sack_reset(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) /* Reduce allocated memory if we can, trying to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288)  * the socket within its memory limits again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290)  * Return less than zero if we should start dropping frames
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291)  * until the socket owning process reads some of the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292)  * to stabilize the situation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) static int tcp_prune_queue(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) 		tcp_clamp_window(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) 	else if (tcp_under_memory_pressure(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) 	tcp_collapse_ofo_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) 	if (!skb_queue_empty(&sk->sk_receive_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) 		tcp_collapse(sk, &sk->sk_receive_queue, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) 			     skb_peek(&sk->sk_receive_queue),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) 			     NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) 			     tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) 	sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) 	/* Collapsing did not help, destructive actions follow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) 	 * This must not ever occur. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) 	tcp_prune_ofo_queue(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) 	/* If we are really being abused, tell the caller to silently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) 	 * drop receive data on the floor.  It will get retransmitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) 	 * and hopefully then we'll have sufficient space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) 	NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) 	/* Massive buffer overcommit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) 	tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) static bool tcp_should_expand_sndbuf(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) 	const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) 	/* If the user specified a specific send buffer setting, do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) 	 * not modify it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) 	if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348) 	/* If we are under global TCP memory pressure, do not expand.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) 	if (tcp_under_memory_pressure(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) 	/* If we are under soft global TCP memory pressure, do not expand.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) 	if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) 	/* If we filled the congestion window, do not expand.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) 	if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) static void tcp_new_space(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) 	if (tcp_should_expand_sndbuf(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) 		tcp_sndbuf_expand(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369) 		tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372) 	sk->sk_write_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) static void tcp_check_space(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) 	/* pairs with tcp_poll() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) 	smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) 	if (sk->sk_socket &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) 	    test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) 		tcp_new_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) 		if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) 			tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) static inline void tcp_data_snd_check(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) 	tcp_push_pending_frames(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) 	tcp_check_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394)  * Check if sending an ack is needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) 	unsigned long rtt, delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) 	    /* More than one full frame received... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) 	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) 	     /* ... and right edge of window advances far enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) 	      * (tcp_recvmsg() will send ACK otherwise).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) 	      * If application uses SO_RCVLOWAT, we want send ack now if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) 	      * we have not received enough bytes to satisfy the condition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) 	      */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) 	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) 	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410) 	    /* We ACK each frame or... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) 	    tcp_in_quickack_mode(sk) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412) 	    /* Protocol state mandates a one-time immediate ACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) 	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) send_now:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) 		tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) 	if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) 		tcp_send_delayed_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) 	if (!tcp_is_sack(tp) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) 	    tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) 		goto send_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) 	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) 		tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) 		tp->dup_ack_counter = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432) 	if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) 		tp->dup_ack_counter++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) 		goto send_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) 	tp->compressed_ack++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) 	/* compress ack timer : 5 % of rtt, but no more than tcp_comp_sack_delay_ns */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) 	rtt = tp->rcv_rtt_est.rtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) 	if (tp->srtt_us && tp->srtt_us < rtt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) 		rtt = tp->srtt_us;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) 	delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) 		      rtt * (NSEC_PER_USEC >> 3)/20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) 	sock_hold(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) 	hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) 			       sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451) 			       HRTIMER_MODE_REL_PINNED_SOFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) static inline void tcp_ack_snd_check(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) 	if (!inet_csk_ack_scheduled(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457) 		/* We sent a data segment already. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) 	__tcp_ack_snd_check(sk, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464)  *	This routine is only called when we have urgent data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465)  *	signaled. Its the 'slow' part of tcp_urg. It could be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466)  *	moved inline now as tcp_urg is only called from one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467)  *	place. We handle URGent data wrong. We have to - as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468)  *	BSD still doesn't use the correction from RFC961.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469)  *	For 1003.1g we should support a new option TCP_STDURG to permit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470)  *	either form (or just set the sysctl tcp_stdurg).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) 	u32 ptr = ntohs(th->urg_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) 	if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) 		ptr--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) 	ptr += ntohl(th->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) 	/* Ignore urgent data that we've already seen and read. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) 	if (after(tp->copied_seq, ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) 	/* Do not replay urg ptr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) 	 * NOTE: interesting situation not covered by specs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) 	 * Misbehaving sender may send urg ptr, pointing to segment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) 	 * which we already have in ofo queue. We are not able to fetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) 	 * such data and will stay in TCP_URG_NOTYET until will be eaten
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) 	 * by recvmsg(). Seems, we are not obliged to handle such wicked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) 	 * situations. But it is worth to think about possibility of some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) 	 * DoSes using some hypothetical application level deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) 	if (before(ptr, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) 	/* Do we already have a newer (or duplicate) urgent pointer? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) 	if (tp->urg_data && !after(ptr, tp->urg_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) 	/* Tell the world about our new urgent pointer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) 	sk_send_sigurg(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) 	/* We may be adding urgent data when the last byte read was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) 	 * urgent. To do this requires some care. We cannot just ignore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) 	 * tp->copied_seq since we would read the last urgent byte again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) 	 * as data, nor can we alter copied_seq until this data arrives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) 	 * or we break the semantics of SIOCATMARK (and thus sockatmark())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512) 	 * NOTE. Double Dutch. Rendering to plain English: author of comment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) 	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) 	 * and expect that both A and B disappear from stream. This is _wrong_.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) 	 * Though this happens in BSD with high probability, this is occasional.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) 	 * Any application relying on this is buggy. Note also, that fix "works"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) 	 * only in this artificial test. Insert some normal data between A and B and we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) 	 * decline of BSD again. Verdict: it is better to remove to trap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) 	 * buggy users.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) 	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) 	    !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) 		tp->copied_seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) 		if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) 			__skb_unlink(skb, &sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) 			__kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) 	tp->urg_data = TCP_URG_NOTYET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) 	WRITE_ONCE(tp->urg_seq, ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) 	/* Disable header prediction. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535) 	tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) /* This is the 'fast' part of urgent handling. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) 	/* Check if we get a new urgent pointer - normally not. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) 	if (th->urg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) 		tcp_check_urg(sk, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) 	/* Do we wait for any urgent data? - normally not... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) 	if (tp->urg_data == TCP_URG_NOTYET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) 		u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) 			  th->syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) 		/* Is the urgent pointer pointing into this packet? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) 		if (ptr < skb->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) 			u8 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) 			if (skb_copy_bits(skb, ptr, &tmp, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) 				BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) 			tp->urg_data = TCP_URG_VALID | tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) 			if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) 				sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) /* Accept RST for rcv_nxt - 1 after a FIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565)  * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566)  * FIN is sent followed by a RST packet. The RST is sent with the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567)  * sequence number as the FIN, and thus according to RFC 5961 a challenge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568)  * ACK should be sent. However, Mac OSX rate limits replies to challenge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569)  * ACKs on the closed socket. In addition middleboxes can drop either the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570)  * challenge ACK or a subsequent RST.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) static bool tcp_reset_check(const struct sock *sk, const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) 	return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) 			(1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) 					       TCPF_CLOSING));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) /* Does PAWS and seqno based validation of an incoming segment, flags will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582)  * play significant role here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) 				  const struct tcphdr *th, int syn_inerr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) 	bool rst_seq_match = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) 	/* RFC1323: H1. Apply PAWS check first. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) 	if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) 	    tp->rx_opt.saw_tstamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) 	    tcp_paws_discard(sk, skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) 		if (!th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) 			if (!tcp_oow_rate_limited(sock_net(sk), skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) 						  LINUX_MIB_TCPACKSKIPPEDPAWS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) 						  &tp->last_oow_ack_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) 				tcp_send_dupack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) 		/* Reset is accepted even if it did not pass PAWS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) 	/* Step 1: check sequence number */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) 	if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) 		/* RFC793, page 37: "In all states except SYN-SENT, all reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) 		 * (RST) segments are validated by checking their SEQ-fields."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) 		 * And page 69: "If an incoming segment is not acceptable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) 		 * an acknowledgment should be sent in reply (unless the RST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) 		 * bit is set, if so drop the segment and return)".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) 		if (!th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) 			if (th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) 				goto syn_challenge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) 			if (!tcp_oow_rate_limited(sock_net(sk), skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) 						  LINUX_MIB_TCPACKSKIPPEDSEQ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) 						  &tp->last_oow_ack_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) 				tcp_send_dupack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) 		} else if (tcp_reset_check(sk, skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) 			tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) 	/* Step 2: check RST bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) 	if (th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) 		/* RFC 5961 3.2 (extend to match against (RCV.NXT - 1) after a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629) 		 * FIN and SACK too if available):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) 		 * If seq num matches RCV.NXT or (RCV.NXT - 1) after a FIN, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) 		 * the right-most SACK block,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) 		 * then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) 		 *     RESET the connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634) 		 * else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) 		 *     Send a challenge ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) 		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) 		    tcp_reset_check(sk, skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) 			rst_seq_match = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) 		} else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) 			struct tcp_sack_block *sp = &tp->selective_acks[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) 			int max_sack = sp[0].end_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) 			int this_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) 			for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) 			     ++this_sack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) 				max_sack = after(sp[this_sack].end_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) 						 max_sack) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) 					sp[this_sack].end_seq : max_sack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) 			if (TCP_SKB_CB(skb)->seq == max_sack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) 				rst_seq_match = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) 		if (rst_seq_match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) 			tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) 			/* Disable TFO if RST is out-of-order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) 			 * and no data has been received
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) 			 * for current active TFO socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) 			if (tp->syn_fastopen && !tp->data_segs_in &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) 			    sk->sk_state == TCP_ESTABLISHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) 				tcp_fastopen_active_disable(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) 			tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) 	/* step 3: check security and precedence [ignored] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) 	/* step 4: Check for a SYN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) 	 * RFC 5961 4.2 : Send a challenge ack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) 	if (th->syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) syn_challenge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) 		if (syn_inerr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) 			TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) 		tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) 	bpf_skops_parse_hdr(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) 	tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695)  *	TCP receive function for the ESTABLISHED state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697)  *	It is split into a fast path and a slow path. The fast path is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698)  * 	disabled when:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699)  *	- A zero window was announced from us - zero window probing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700)  *        is only handled properly in the slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701)  *	- Out of order segments arrived.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702)  *	- Urgent data is expected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703)  *	- There is no buffer space left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704)  *	- Unexpected TCP flags/window values/header lengths are received
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705)  *	  (detected by checking the TCP header against pred_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706)  *	- Data is sent in both directions. Fast path only supports pure senders
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707)  *	  or pure receivers (this means either the sequence number or the ack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708)  *	  value must stay constant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709)  *	- Unexpected TCP option.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711)  *	When these conditions are not satisfied it drops into a standard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712)  *	receive procedure patterned after RFC793 to handle all cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713)  *	The first three cases are guaranteed by proper pred_flags setting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714)  *	the rest is checked inline. Fast processing is turned on in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715)  *	tcp_data_queue when everything is OK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) 	const struct tcphdr *th = (const struct tcphdr *)skb->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) 	unsigned int len = skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) 	/* TCP congestion window tracking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) 	trace_tcp_probe(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) 	tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) 	if (unlikely(!sk->sk_rx_dst))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) 		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) 	 *	Header prediction.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) 	 *	The code loosely follows the one in the famous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) 	 *	"30 instruction TCP receive" Van Jacobson mail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) 	 *	Van's trick is to deposit buffers into socket queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) 	 *	on a device interrupt, to call tcp_recv function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) 	 *	on the receive process context and checksum and copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) 	 *	the buffer to user space. smart...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) 	 *	Our current scheme is not silly either but we take the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) 	 *	extra cost of the net_bh soft interrupt processing...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) 	 *	We do checksum and copy also but from device to kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) 	tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746) 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) 	 *	if header_prediction is to be made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748) 	 *	'S' will always be tp->tcp_header_len >> 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) 	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) 	 *  turn it off	(when there are holes in the receive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) 	 *	 space for instance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752) 	 *	PSH flag is ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) 	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) 	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) 	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) 		int tcp_header_len = tp->tcp_header_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) 		/* Timestamp header prediction: tcp_header_len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) 		 * is automatically equal to th->doff*4 due to pred_flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) 		 * match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) 		/* Check timestamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) 		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) 			/* No? Slow path! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) 			if (!tcp_parse_aligned_timestamp(tp, th))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) 				goto slow_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) 			/* If PAWS failed, check it more carefully in slow path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) 			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) 				goto slow_path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) 			/* DO NOT update ts_recent here, if checksum fails
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) 			 * and timestamp was corrupted part, it will result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) 			 * in a hung connection since we will drop all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) 			 * future packets due to the PAWS test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) 		if (len <= tcp_header_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) 			/* Bulk data transfer: sender */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) 			if (len == tcp_header_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) 				/* Predicted packet is in window by definition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786) 				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) 				 * Hence, check seq<=rcv_wup reduces to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) 				if (tcp_header_len ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) 				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) 				    tp->rcv_nxt == tp->rcv_wup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) 					tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) 				/* We know that such packets are checksummed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) 				 * on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) 				tcp_ack(sk, skb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) 				__kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) 				tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) 				/* When receiving pure ack in fast path, update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) 				 * last ts ecr directly instead of calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) 				 * tcp_rcv_rtt_measure_ts()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) 				tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) 			} else { /* Header too small */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) 				goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) 			int eaten = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) 			bool fragstolen = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814) 			if (tcp_checksum_complete(skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) 				goto csum_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) 			if ((int)skb->truesize > sk->sk_forward_alloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) 				goto step5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) 			/* Predicted packet is in window by definition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) 			 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) 			 * Hence, check seq<=rcv_wup reduces to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) 			if (tcp_header_len ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) 			    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) 			    tp->rcv_nxt == tp->rcv_wup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) 				tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) 			tcp_rcv_rtt_measure_ts(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) 			/* Bulk data transfer: receiver */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) 			__skb_pull(skb, tcp_header_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) 			eaten = tcp_queue_rcv(sk, skb, &fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) 			tcp_event_data_recv(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) 			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) 				/* Well, only one small jumplet in fast path... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) 				tcp_ack(sk, skb, FLAG_DATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) 				tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) 				if (!inet_csk_ack_scheduled(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) 					goto no_ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) 				tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) 			__tcp_ack_snd_check(sk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) no_ack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) 			if (eaten)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) 				kfree_skb_partial(skb, fragstolen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) 			tcp_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) slow_path:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) 	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) 		goto csum_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) 	if (!th->ack && !th->rst && !th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) 	 *	Standard slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) 	if (!tcp_validate_incoming(sk, skb, th, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) step5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) 	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) 	tcp_rcv_rtt_measure_ts(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) 	/* Process urgent data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) 	tcp_urg(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) 	/* step 7: process the segment text */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) 	tcp_data_queue(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) 	tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) 	tcp_ack_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) csum_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) 	tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) EXPORT_SYMBOL(tcp_rcv_established);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) 	tcp_mtup_init(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) 	icsk->icsk_af_ops->rebuild_header(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) 	tcp_init_metrics(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) 	/* Initialize the congestion window to start the transfer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907) 	 * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) 	 * retransmitted. In light of RFC6298 more aggressive 1sec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) 	 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) 	 * retransmission has occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) 	if (tp->total_retrans > 1 && tp->undo_marker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) 		tp->snd_cwnd = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) 		tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) 	tp->snd_cwnd_stamp = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) 	bpf_skops_established(sk, bpf_op, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) 	/* Initialize congestion control unless BPF initialized it already: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) 	if (!icsk->icsk_ca_initialized)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) 		tcp_init_congestion_control(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) 	tcp_init_buffer_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) 	tcp_set_state(sk, TCP_ESTABLISHED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) 	icsk->icsk_ack.lrcvtime = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) 	if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) 		security_inet_conn_established(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) 		sk_mark_napi_id(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) 	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) 	/* Prevent spurious tcp_cwnd_restart() on first data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) 	 * packet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) 	tp->lsndtime = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) 	if (sock_flag(sk, SOCK_KEEPOPEN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) 	if (!tp->rx_opt.snd_wscale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) 		__tcp_fast_path_on(tp, tp->snd_wnd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) 		tp->pred_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956) 				    struct tcp_fastopen_cookie *cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) 	struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) 	u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) 	bool syn_drop = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) 	if (mss == tp->rx_opt.user_mss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) 		struct tcp_options_received opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) 		/* Get original SYNACK MSS value if user MSS sets mss_clamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) 		tcp_clear_options(&opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) 		opt.user_mss = opt.mss_clamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) 		tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) 		mss = opt.mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) 	if (!tp->syn_fastopen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) 		/* Ignore an unsolicited cookie */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) 		cookie->len = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) 	} else if (tp->total_retrans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) 		/* SYN timed out and the SYN-ACK neither has a cookie nor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) 		 * acknowledges data. Presumably the remote received only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) 		 * the retransmitted (regular) SYNs: either the original
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) 		 * SYN-data or the corresponding SYN-ACK was dropped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) 		syn_drop = (cookie->len < 0 && data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983) 	} else if (cookie->len < 0 && !tp->syn_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) 		/* We requested a cookie but didn't get it. If we did not use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985) 		 * the (old) exp opt format then try so next time (try_exp=1).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986) 		 * Otherwise we go back to use the RFC7413 opt (try_exp=2).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) 		try_exp = tp->syn_fastopen_exp ? 2 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) 	tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993) 	if (data) { /* Retransmit unacked data in SYN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) 		if (tp->total_retrans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) 			tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) 			tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998) 		skb_rbtree_walk_from(data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999) 			if (__tcp_retransmit_skb(sk, data, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) 		tcp_rearm_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003) 		NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) 				LINUX_MIB_TCPFASTOPENACTIVEFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007) 	tp->syn_data_acked = tp->syn_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) 	if (tp->syn_data_acked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) 		/* SYN-data is counted as two separate packets in tcp_ack() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011) 		if (tp->delivered > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012) 			--tp->delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015) 	tcp_fastopen_add_skb(sk, synack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) static void smc_check_reset_syn(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) #if IS_ENABLED(CONFIG_SMC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023) 	if (static_branch_unlikely(&tcp_have_smc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) 		if (tp->syn_smc && !tp->rx_opt.smc_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) 			tp->syn_smc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) static void tcp_try_undo_spurious_syn(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033) 	u32 syn_stamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) 	/* undo_marker is set when SYN or SYNACK times out. The timeout is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) 	 * spurious if the ACK's timestamp option echo value matches the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037) 	 * original SYN timestamp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) 	syn_stamp = tp->retrans_stamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040) 	if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041) 	    syn_stamp == tp->rx_opt.rcv_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) 		tp->undo_marker = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046) 					 const struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) 	struct tcp_fastopen_cookie foc = { .len = -1 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051) 	int saved_clamp = tp->rx_opt.mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052) 	bool fastopen_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) 	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056) 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) 	if (th->ack) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059) 		/* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) 		 * "If the state is SYN-SENT then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) 		 *    first check the ACK bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) 		 *      If the ACK bit is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063) 		 *	  If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) 		 *        a reset (unless the RST bit is set, if so drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) 		 *        the segment and return)"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067) 		if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) 		    after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) 			/* Previous FIN/ACK or RST/ACK might be ignored. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070) 			if (icsk->icsk_retransmits == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071) 				inet_csk_reset_xmit_timer(sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072) 						ICSK_TIME_RETRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073) 						TCP_TIMEOUT_MIN, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074) 			goto reset_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077) 		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078) 		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079) 			     tcp_time_stamp(tp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080) 			NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081) 					LINUX_MIB_PAWSACTIVEREJECTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082) 			goto reset_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) 		/* Now ACK is acceptable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) 		 * "If the RST bit is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088) 		 *    If the ACK was acceptable then signal the user "error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) 		 *    connection reset", drop the segment, enter CLOSED state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090) 		 *    delete TCB, and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) 		if (th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) 			tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) 		/* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) 		 *   "fifth, if neither of the SYN or RST bits is set then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100) 		 *    drop the segment and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102) 		 *    See note below!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) 		 *                                        --ANK(990513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) 		if (!th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) 			goto discard_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108) 		/* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) 		 *   "If the SYN bit is on ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) 		 *    are acceptable then ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) 		 *    (our SYN has been ACKed), change the connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) 		 *    state to ESTABLISHED..."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115) 		tcp_ecn_rcv_synack(tp, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) 		tcp_try_undo_spurious_syn(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) 		tcp_ack(sk, skb, FLAG_SLOWPATH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) 		/* Ok.. it's good. Set up sequence numbers and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) 		 * move to established.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124) 		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127) 		/* RFC1323: The window in SYN & SYN/ACK segments is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) 		 * never scaled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130) 		tp->snd_wnd = ntohs(th->window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132) 		if (!tp->rx_opt.wscale_ok) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133) 			tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) 			tp->window_clamp = min(tp->window_clamp, 65535U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137) 		if (tp->rx_opt.saw_tstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) 			tp->rx_opt.tstamp_ok	   = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139) 			tp->tcp_header_len =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) 				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141) 			tp->advmss	    -= TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142) 			tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144) 			tp->tcp_header_len = sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147) 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148) 		tcp_initialize_rcv_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150) 		/* Remember, tcp_poll() does not lock socket!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) 		 * Change state from SYN-SENT only after copied_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152) 		 * is initialized. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) 		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) 		smc_check_reset_syn(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) 		smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159) 		tcp_finish_connect(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161) 		fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162) 				tcp_rcv_fastopen_synack(sk, skb, &foc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164) 		if (!sock_flag(sk, SOCK_DEAD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) 			sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166) 			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) 		if (fastopen_fail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) 		if (sk->sk_write_pending ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171) 		    icsk->icsk_accept_queue.rskq_defer_accept ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172) 		    inet_csk_in_pingpong_mode(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) 			/* Save one ACK. Data will be ready after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) 			 * several ticks, if write_pending is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) 			 * It may be deleted, but with this feature tcpdumps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177) 			 * look so _wonderfully_ clever, that I was not able
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) 			 * to stand against the temptation 8)     --ANK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180) 			inet_csk_schedule_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181) 			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183) 						  TCP_DELACK_MAX, TCP_RTO_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186) 			tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) 			tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) 	/* No ACK in the segment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196) 	if (th->rst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) 		/* rfc793:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198) 		 * "If the RST bit is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) 		 *      Otherwise (no ACK) drop the segment and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203) 		goto discard_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206) 	/* PAWS check. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) 	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208) 	    tcp_paws_reject(&tp->rx_opt, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) 		goto discard_and_undo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) 	if (th->syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212) 		/* We see SYN without ACK. It is attempt of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213) 		 * simultaneous connect with crossed SYNs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) 		 * Particularly, it can be connect to self.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) 		tcp_set_state(sk, TCP_SYN_RECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218) 		if (tp->rx_opt.saw_tstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) 			tp->rx_opt.tstamp_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220) 			tcp_store_ts_recent(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221) 			tp->tcp_header_len =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222) 				sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224) 			tp->tcp_header_len = sizeof(struct tcphdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227) 		WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228) 		WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229) 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) 		/* RFC1323: The window in SYN & SYN/ACK segments is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232) 		 * never scaled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) 		tp->snd_wnd    = ntohs(th->window);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235) 		tp->snd_wl1    = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236) 		tp->max_window = tp->snd_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238) 		tcp_ecn_rcv_syn(tp, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) 		tcp_mtup_init(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241) 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242) 		tcp_initialize_rcv_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244) 		tcp_send_synack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) #if 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) 		/* Note, we could accept data and URG from this segment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247) 		 * There are no obstacles to make this (except that we must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248) 		 * either change tcp_recvmsg() to prevent it from returning data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) 		 * before 3WHS completes per RFC793, or employ TCP Fast Open).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251) 		 * However, if we ignore data in ACKless segments sometimes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) 		 * we have no reasons to accept it sometimes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) 		 * Also, seems the code doing it in step6 of tcp_rcv_state_process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254) 		 * is not flawless. So, discard packet for sanity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255) 		 * Uncomment this return to process the data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) 	/* "fifth, if neither of the SYN or RST bits is set then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) 	 * drop the segment and return."
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266) discard_and_undo:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) 	tcp_clear_options(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268) 	tp->rx_opt.mss_clamp = saved_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) 	goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271) reset_and_undo:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) 	tcp_clear_options(&tp->rx_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273) 	tp->rx_opt.mss_clamp = saved_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279) 	struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) 	/* If we are still handling the SYNACK RTO, see if timestamp ECR allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) 	 * undo. If peer SACKs triggered fast recovery, we can't undo here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) 	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) 		tcp_try_undo_loss(sk, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) 	/* Reset rtx states to prevent spurious retransmits_timed_out() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288) 	tcp_sk(sk)->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) 	inet_csk(sk)->icsk_retransmits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291) 	/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) 	 * we no longer need req so release it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294) 	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) 					lockdep_sock_is_held(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) 	reqsk_fastopen_remove(sk, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) 	/* Re-arm the timer because data may have been sent out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299) 	 * This is similar to the regular data transmission case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300) 	 * when new data has just been ack'ed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) 	 * (TFO) - we could try to be more aggressive and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303) 	 * retransmitting any data sooner based on when they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304) 	 * are sent out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) 	tcp_rearm_rto(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310)  *	This function implements the receiving procedure of RFC 793 for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311)  *	all states except ESTABLISHED and TIME_WAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312)  *	It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313)  *	address independent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316) int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319) 	struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320) 	const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) 	struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) 	int queued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) 	bool acceptable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) 	switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326) 	case TCP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329) 	case TCP_LISTEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) 		if (th->ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) 		if (th->rst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) 		if (th->syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337) 			if (th->fin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338) 				goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) 			/* It is possible that we process SYN packets from backlog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) 			 * so we need to make sure to disable BH and RCU right there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342) 			rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) 			local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) 			acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) 			local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) 			if (!acceptable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349) 				return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) 			consume_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355) 	case TCP_SYN_SENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356) 		tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) 		tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) 		queued = tcp_rcv_synsent_state_process(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) 		if (queued >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360) 			return queued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) 		/* Do step6 onward by hand. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) 		tcp_urg(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) 		__kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365) 		tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369) 	tcp_mstamp_refresh(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) 	tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371) 	req = rcu_dereference_protected(tp->fastopen_rsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) 					lockdep_sock_is_held(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373) 	if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374) 		bool req_stolen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376) 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377) 		    sk->sk_state != TCP_FIN_WAIT1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379) 		if (!tcp_check_req(sk, skb, req, true, &req_stolen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383) 	if (!th->ack && !th->rst && !th->syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386) 	if (!tcp_validate_incoming(sk, skb, th, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389) 	/* step 5: check the ACK field */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390) 	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391) 				      FLAG_UPDATE_TS_RECENT |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392) 				      FLAG_NO_CHALLENGE_ACK) > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394) 	if (!acceptable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) 		if (sk->sk_state == TCP_SYN_RECV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396) 			return 1;	/* send one RST */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397) 		tcp_send_challenge_ack(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) 		goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400) 	switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) 	case TCP_SYN_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) 		tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403) 		if (!tp->srtt_us)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) 			tcp_synack_rtt_meas(sk, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406) 		if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407) 			tcp_rcv_synrecv_state_fastopen(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409) 			tcp_try_undo_spurious_syn(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) 			tp->retrans_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411) 			tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412) 					  skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) 			WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415) 		smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) 		tcp_set_state(sk, TCP_ESTABLISHED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417) 		sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419) 		/* Note, that this wakeup is only for marginal crossed SYN case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) 		 * Passively open sockets are not waked up, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) 		 * sk->sk_sleep == NULL and sk->sk_socket == NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) 		if (sk->sk_socket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424) 			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) 		tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) 		tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) 		if (tp->rx_opt.tstamp_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) 			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) 		if (!inet_csk(sk)->icsk_ca_ops->cong_control)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) 			tcp_update_pacing_rate(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436) 		/* Prevent spurious tcp_cwnd_restart() on first data packet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437) 		tp->lsndtime = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439) 		tcp_initialize_rcv_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440) 		tcp_fast_path_on(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443) 	case TCP_FIN_WAIT1: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) 		int tmo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) 		if (req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447) 			tcp_rcv_synrecv_state_fastopen(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) 		if (tp->snd_una != tp->write_seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) 		tcp_set_state(sk, TCP_FIN_WAIT2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453) 		sk->sk_shutdown |= SEND_SHUTDOWN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) 		sk_dst_confirm(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457) 		if (!sock_flag(sk, SOCK_DEAD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458) 			/* Wake up lingering close() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) 			sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463) 		if (tp->linger2 < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) 			tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468) 		if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) 		    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) 			/* Receive out of order FIN after close() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) 			if (tp->syn_fastopen && th->fin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) 				tcp_fastopen_active_disable(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) 			tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) 		tmo = tcp_fin_time(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) 		if (tmo > TCP_TIMEWAIT_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) 			inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481) 		} else if (th->fin || sock_owned_by_user(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) 			/* Bad case. We could lose such FIN otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483) 			 * It is not a big problem, but it looks confusing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484) 			 * and not so rare event. We still can lose it now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) 			 * if it spins in bh_lock_sock(), but it is really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) 			 * marginal case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) 			inet_csk_reset_keepalive_timer(sk, tmo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) 			tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496) 	case TCP_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497) 		if (tp->snd_una == tp->write_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498) 			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) 	case TCP_LAST_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) 		if (tp->snd_una == tp->write_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) 			tcp_update_metrics(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506) 			tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507) 			goto discard;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512) 	/* step 6: check the URG bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) 	tcp_urg(sk, skb, th);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515) 	/* step 7: process the segment text */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516) 	switch (sk->sk_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) 	case TCP_CLOSE_WAIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) 	case TCP_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519) 	case TCP_LAST_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520) 		if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) 			if (sk_is_mptcp(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522) 				mptcp_incoming_options(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) 	case TCP_FIN_WAIT1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527) 	case TCP_FIN_WAIT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528) 		/* RFC 793 says to queue data in these states,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529) 		 * RFC 1122 says we MUST send a reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) 		 * BSD 4.4 also does reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) 			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534) 			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535) 				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536) 				tcp_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537) 				return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541) 	case TCP_ESTABLISHED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542) 		tcp_data_queue(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543) 		queued = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547) 	/* tcp_data could move socket to TIME-WAIT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548) 	if (sk->sk_state != TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549) 		tcp_data_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550) 		tcp_ack_snd_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553) 	if (!queued) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554) discard:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555) 		tcp_drop(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) EXPORT_SYMBOL(tcp_rcv_state_process);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561) static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) 	struct inet_request_sock *ireq = inet_rsk(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565) 	if (family == AF_INET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) 		net_dbg_ratelimited("drop open request from %pI4/%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567) 				    &ireq->ir_rmt_addr, port);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) #if IS_ENABLED(CONFIG_IPV6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569) 	else if (family == AF_INET6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570) 		net_dbg_ratelimited("drop open request from %pI6/%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) 				    &ireq->ir_v6_rmt_addr, port);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) /* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577)  * If we receive a SYN packet with these bits set, it means a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578)  * network is playing bad games with TOS bits. In order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579)  * avoid possible false congestion notifications, we disable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580)  * TCP ECN negotiation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582)  * Exception: tcp_ca wants ECN. This is required for DCTCP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583)  * congestion control: Linux DCTCP asserts ECT on all packets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584)  * including SYN, which is most optimal solution; however,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585)  * others, such as FreeBSD do not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587)  * Exception: At least one of the reserved bits of the TCP header (th->res1) is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588)  * set, indicating the use of a future TCP extension (such as AccECN). See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589)  * RFC8311 §4.3 which updates RFC3168 to allow the development of such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590)  * extensions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) static void tcp_ecn_create_request(struct request_sock *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593) 				   const struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594) 				   const struct sock *listen_sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595) 				   const struct dst_entry *dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597) 	const struct tcphdr *th = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) 	const struct net *net = sock_net(listen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) 	bool th_ecn = th->ece && th->cwr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600) 	bool ect, ecn_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601) 	u32 ecn_ok_dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603) 	if (!th_ecn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) 	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607) 	ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608) 	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) 	if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611) 	    (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) 	    tcp_bpf_ca_needs_ecn((struct sock *)req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613) 		inet_rsk(req)->ecn_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) static void tcp_openreq_init(struct request_sock *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617) 			     const struct tcp_options_received *rx_opt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618) 			     struct sk_buff *skb, const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) 	struct inet_request_sock *ireq = inet_rsk(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622) 	req->rsk_rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624) 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) 	tcp_rsk(req)->snt_synack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626) 	tcp_rsk(req)->last_oow_ack_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) 	req->mss = rx_opt->mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629) 	ireq->tstamp_ok = rx_opt->tstamp_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630) 	ireq->sack_ok = rx_opt->sack_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) 	ireq->snd_wscale = rx_opt->snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) 	ireq->wscale_ok = rx_opt->wscale_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633) 	ireq->acked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634) 	ireq->ecn_ok = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635) 	ireq->ir_rmt_port = tcp_hdr(skb)->source;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636) 	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) 	ireq->ir_mark = inet_request_mark(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) #if IS_ENABLED(CONFIG_SMC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639) 	ireq->smc_ok = rx_opt->smc_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644) 				      struct sock *sk_listener,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) 				      bool attach_listener)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647) 	struct request_sock *req = reqsk_alloc(ops, sk_listener,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) 					       attach_listener);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) 	if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651) 		struct inet_request_sock *ireq = inet_rsk(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) 		ireq->ireq_opt = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654) #if IS_ENABLED(CONFIG_IPV6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655) 		ireq->pktopts = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) 		atomic64_set(&ireq->ir_cookie, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658) 		ireq->ireq_state = TCP_NEW_SYN_RECV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) 		write_pnet(&ireq->ireq_net, sock_net(sk_listener));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660) 		ireq->ireq_family = sk_listener->sk_family;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) 	return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) EXPORT_SYMBOL(inet_reqsk_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668)  * Return true if a syncookie should be sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670) static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672) 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673) 	const char *msg = "Dropping request";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) 	bool want_cookie = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) 	struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677) #ifdef CONFIG_SYN_COOKIES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) 	if (net->ipv4.sysctl_tcp_syncookies) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) 		msg = "Sending cookies";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680) 		want_cookie = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) 	if (!queue->synflood_warned &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) 	    net->ipv4.sysctl_tcp_syncookies != 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688) 	    xchg(&queue->synflood_warned, 1) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689) 		net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) 				     proto, sk->sk_num, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692) 	return want_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695) static void tcp_reqsk_record_syn(const struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) 				 struct request_sock *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) 				 const struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) 	if (tcp_sk(sk)->save_syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700) 		u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) 		struct saved_syn *saved_syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702) 		u32 mac_hdrlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703) 		void *base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705) 		if (tcp_sk(sk)->save_syn == 2) {  /* Save full header. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706) 			base = skb_mac_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707) 			mac_hdrlen = skb_mac_header_len(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708) 			len += mac_hdrlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710) 			base = skb_network_header(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711) 			mac_hdrlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) 		saved_syn = kmalloc(struct_size(saved_syn, data, len),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) 				    GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716) 		if (saved_syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) 			saved_syn->mac_hdrlen = mac_hdrlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) 			saved_syn->network_hdrlen = skb_network_header_len(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719) 			saved_syn->tcp_hdrlen = tcp_hdrlen(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720) 			memcpy(saved_syn->data, base, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6721) 			req->saved_syn = saved_syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6722) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6723) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6726) /* If a SYN cookie is required and supported, returns a clamped MSS value to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6727)  * used for SYN cookie generation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6728)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6729) u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6730) 			  const struct tcp_request_sock_ops *af_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6731) 			  struct sock *sk, struct tcphdr *th)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6732) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6733) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6734) 	u16 mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6736) 	if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6737) 	    !inet_csk_reqsk_queue_is_full(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6738) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6740) 	if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6741) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6743) 	if (sk_acceptq_is_full(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6744) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6745) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6746) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6748) 	mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6749) 	if (!mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6750) 		mss = af_ops->mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6752) 	return mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6754) EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6756) int tcp_conn_request(struct request_sock_ops *rsk_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6757) 		     const struct tcp_request_sock_ops *af_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6758) 		     struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6760) 	struct tcp_fastopen_cookie foc = { .len = -1 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6761) 	__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6762) 	struct tcp_options_received tmp_opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6763) 	struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6764) 	struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6765) 	struct sock *fastopen_sk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6766) 	struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6767) 	bool want_cookie = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6768) 	struct dst_entry *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6769) 	struct flowi fl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6771) 	/* TW buckets are converted to open requests without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6772) 	 * limitations, they conserve resources and peer is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6773) 	 * evidently real one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6774) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6775) 	if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6776) 	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6777) 		want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6778) 		if (!want_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6779) 			goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6780) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6782) 	if (sk_acceptq_is_full(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6783) 		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6784) 		goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6785) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6787) 	req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6788) 	if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6789) 		goto drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6791) 	req->syncookie = want_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6792) 	tcp_rsk(req)->af_specific = af_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6793) 	tcp_rsk(req)->ts_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6794) #if IS_ENABLED(CONFIG_MPTCP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6795) 	tcp_rsk(req)->is_mptcp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6796) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6798) 	tcp_clear_options(&tmp_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6799) 	tmp_opt.mss_clamp = af_ops->mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6800) 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6801) 	tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6802) 			  want_cookie ? NULL : &foc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6804) 	if (want_cookie && !tmp_opt.saw_tstamp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6805) 		tcp_clear_options(&tmp_opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6807) 	if (IS_ENABLED(CONFIG_SMC) && want_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6808) 		tmp_opt.smc_ok = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6810) 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6811) 	tcp_openreq_init(req, &tmp_opt, skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6812) 	inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6814) 	/* Note: tcp_v6_init_req() might override ir_iif for link locals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6815) 	inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6817) 	af_ops->init_req(req, sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6819) 	if (security_inet_conn_request(sk, skb, req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6820) 		goto drop_and_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6822) 	if (tmp_opt.tstamp_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6823) 		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6825) 	dst = af_ops->route_req(sk, &fl, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6826) 	if (!dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6827) 		goto drop_and_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6829) 	if (!want_cookie && !isn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6830) 		/* Kill the following clause, if you dislike this way. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6831) 		if (!net->ipv4.sysctl_tcp_syncookies &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6832) 		    (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6833) 		     (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6834) 		    !tcp_peer_is_proven(req, dst)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6835) 			/* Without syncookies last quarter of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6836) 			 * backlog is filled with destinations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6837) 			 * proven to be alive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6838) 			 * It means that we continue to communicate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6839) 			 * to destinations, already remembered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6840) 			 * to the moment of synflood.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6841) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6842) 			pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6843) 				    rsk_ops->family);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6844) 			goto drop_and_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6845) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6847) 		isn = af_ops->init_seq(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6848) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6850) 	tcp_ecn_create_request(req, skb, sk, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6852) 	if (want_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6853) 		isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6854) 		if (!tmp_opt.tstamp_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6855) 			inet_rsk(req)->ecn_ok = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6856) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6858) 	tcp_rsk(req)->snt_isn = isn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6859) 	tcp_rsk(req)->txhash = net_tx_rndhash();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6860) 	tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6861) 	tcp_openreq_init_rwin(req, sk, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6862) 	sk_rx_queue_set(req_to_sk(req), skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6863) 	if (!want_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6864) 		tcp_reqsk_record_syn(sk, req, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6865) 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6866) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6867) 	if (fastopen_sk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6868) 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6869) 				    &foc, TCP_SYNACK_FASTOPEN, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6870) 		/* Add the child socket directly into the accept queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6871) 		if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6872) 			reqsk_fastopen_remove(fastopen_sk, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6873) 			bh_unlock_sock(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6874) 			sock_put(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6875) 			goto drop_and_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6876) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6877) 		sk->sk_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6878) 		bh_unlock_sock(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6879) 		sock_put(fastopen_sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6880) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6881) 		tcp_rsk(req)->tfo_listener = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6882) 		if (!want_cookie)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6883) 			inet_csk_reqsk_queue_hash_add(sk, req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6884) 				tcp_timeout_init((struct sock *)req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6885) 		af_ops->send_synack(sk, dst, &fl, req, &foc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6886) 				    !want_cookie ? TCP_SYNACK_NORMAL :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6887) 						   TCP_SYNACK_COOKIE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6888) 				    skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6889) 		if (want_cookie) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6890) 			reqsk_free(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6891) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6892) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6893) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6894) 	reqsk_put(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6895) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6897) drop_and_release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6898) 	dst_release(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6899) drop_and_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6900) 	__reqsk_free(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6901) drop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6902) 	tcp_listendrop(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6903) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6905) EXPORT_SYMBOL(tcp_conn_request);