^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * INET An implementation of the TCP/IP protocol suite for the LINUX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * operating system. INET is implemented using the BSD Socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * interface as the means of communication with the user level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Implementation of the Transmission Control Protocol(TCP).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Authors: Ross Biro
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Mark Evans, <evansmp@uhura.aston.ac.uk>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Corey Minyard <wf-rch!minyard@relay.EU.net>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Florian La Roche, <flla@stud.uni-sb.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Linus Torvalds, <torvalds@cs.helsinki.fi>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Alan Cox, <gw4pts@gw4pts.ampr.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Matthew Dillon, <dillon@apollo.west.oic.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Jorge Cwik, <jorge@laser.satlink.net>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * Fixes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * Alan Cox : Numerous verify_area() calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Alan Cox : Set the ACK bit on a reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Alan Cox : Stopped it crashing if it closed while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * sk->inuse=1 and was trying to connect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * (tcp_err()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * Alan Cox : All icmp error handling was broken
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * pointers passed where wrong and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * socket was looked up backwards. Nobody
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * tested any icmp error code obviously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * Alan Cox : tcp_err() now handled properly. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * wakes people on errors. poll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * behaves and the icmp error race
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * has gone by moving it into sock.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * Alan Cox : tcp_send_reset() fixed to work for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * everything not just packets for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * unknown sockets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * Alan Cox : tcp option processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * Alan Cox : Reset tweaked (still not 100%) [Had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * syn rule wrong]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * Herp Rosmanith : More reset fixes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * Alan Cox : No longer acks invalid rst frames.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * Acking any kind of RST is right out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * Alan Cox : Sets an ignore me flag on an rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * receive otherwise odd bits of prattle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * escape still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * Alan Cox : Fixed another acking RST frame bug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * Should stop LAN workplace lockups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * Alan Cox : Some tidyups using the new skb list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * facilities
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * Alan Cox : sk->keepopen now seems to work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * Alan Cox : Pulls options out correctly on accepts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * Alan Cox : Fixed assorted sk->rqueue->next errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * Alan Cox : PSH doesn't end a TCP read. Switched a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * bit to skb ops.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * Alan Cox : Tidied tcp_data to avoid a potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * nasty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * Alan Cox : Added some better commenting, as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * tcp is hard to follow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * Alan Cox : Removed incorrect check for 20 * psh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * Michael O'Reilly : ack < copied bug fix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * Johannes Stille : Misc tcp fixes (not all in yet).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * Alan Cox : FIN with no memory -> CRASH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * Alan Cox : Added socket option proto entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * Also added awareness of them to accept.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * Alan Cox : Added TCP options (SOL_TCP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * Alan Cox : Switched wakeup calls to callbacks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * so the kernel can layer network
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * sockets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * Alan Cox : Use ip_tos/ip_ttl settings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * Alan Cox : Handle FIN (more) properly (we hope).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * Alan Cox : RST frames sent on unsynchronised
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * state ack error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * Alan Cox : Put in missing check for SYN bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * Alan Cox : Added tcp_select_window() aka NET2E
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * window non shrink trick.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * Alan Cox : Added a couple of small NET2E timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * fixes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * Charles Hedrick : TCP fixes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * Toomas Tamm : TCP window fixes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * Alan Cox : Small URG fix to rlogin ^C ack fight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * Charles Hedrick : Rewrote most of it to actually work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * Linus : Rewrote tcp_read() and URG handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * completely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * Gerhard Koerting: Fixed some missing timer handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * Matthew Dillon : Reworked TCP machine states as per RFC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * Gerhard Koerting: PC/TCP workarounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * Adam Caldwell : Assorted timer/timing errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * Matthew Dillon : Fixed another RST bug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * Alan Cox : Move to kernel side addressing changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * Alan Cox : Beginning work on TCP fastpathing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * (not yet usable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * Arnt Gulbrandsen: Turbocharged tcp_check() routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * Alan Cox : TCP fast path debugging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * Alan Cox : Window clamping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * Michael Riepe : Bug in tcp_check()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Matt Dillon : More TCP improvements and RST bug fixes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * Matt Dillon : Yet more small nasties remove from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * TCP code (Be very nice to this man if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * tcp finally works 100%) 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * Alan Cox : BSD accept semantics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * Alan Cox : Reset on closedown bug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * Peter De Schrijver : ENOTCONN check missing in tcp_sendto().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * Michael Pall : Handle poll() after URG properly in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) * all cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * Michael Pall : Undo the last fix in tcp_read_urg()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * (multi URG PUSH broke rlogin).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) * Michael Pall : Fix the multi URG PUSH problem in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * tcp_readable(), poll() after URG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * works now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * Michael Pall : recv(...,MSG_OOB) never blocks in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * BSD api.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * Alan Cox : Changed the semantics of sk->socket to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * fix a race and a signal problem with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * accept() and async I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * Alan Cox : Relaxed the rules on tcp_sendto().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * Yury Shevchuk : Really fixed accept() blocking problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * Craig I. Hagan : Allow for BSD compatible TIME_WAIT for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * clients/servers which listen in on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * fixed ports.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * Alan Cox : Cleaned the above up and shrank it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * a sensible code size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * Alan Cox : Self connect lockup fix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * Alan Cox : No connect to multicast.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * Ross Biro : Close unaccepted children on master
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * socket close.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * Alan Cox : Reset tracing code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * Alan Cox : Spurious resets on shutdown.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * Alan Cox : Giant 15 minute/60 second timer error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * Alan Cox : Small whoops in polling before an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * accept.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * Alan Cox : Kept the state trace facility since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * it's handy for debugging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * Alan Cox : More reset handler fixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * Alan Cox : Started rewriting the code based on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * the RFC's for other useful protocol
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * references see: Comer, KA9Q NOS, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * for a reference on the difference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * between specifications and how BSD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * works see the 4.4lite source.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * A.N.Kuznetsov : Don't time wait on completion of tidy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * close.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * Linus Torvalds : Fin/Shutdown & copied_seq changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * Linus Torvalds : Fixed BSD port reuse to work first syn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Alan Cox : Reimplemented timers as per the RFC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * and using multiple timers for sanity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * Alan Cox : Small bug fixes, and a lot of new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * comments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * Alan Cox : Fixed dual reader crash by locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * the buffers (much like datagram.c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * Alan Cox : Fixed stuck sockets in probe. A probe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * now gets fed up of retrying without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * (even a no space) answer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * Alan Cox : Extracted closing code better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * Alan Cox : Fixed the closing state machine to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * resemble the RFC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * Alan Cox : More 'per spec' fixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * Jorge Cwik : Even faster checksumming.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * Alan Cox : tcp_data() doesn't ack illegal PSH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * only frames. At least one pc tcp stack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * generates them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * Alan Cox : Cache last socket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * Alan Cox : Per route irtt.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * Matt Day : poll()->select() match BSD precisely on error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * Alan Cox : New buffers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * Marc Tamsky : Various sk->prot->retransmits and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * sk->retransmits misupdating fixed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * Fixed tcp_write_timeout: stuck close,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * and TCP syn retries gets used now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * Mark Yarvis : In tcp_read_wakeup(), don't send an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * ack if state is TCP_CLOSED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * Alan Cox : Look up device on a retransmit - routes may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * change. Doesn't yet cope with MSS shrink right
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * but it's a start!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * Marc Tamsky : Closing in closing fixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * Mike Shaver : RFC1122 verifications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * Alan Cox : rcv_saddr errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * Alan Cox : Block double connect().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * Alan Cox : Small hooks for enSKIP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * Alexey Kuznetsov: Path MTU discovery.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * Alan Cox : Support soft errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * Alan Cox : Fix MTU discovery pathological case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * when the remote claims no mtu!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * Marc Tamsky : TCP_CLOSE fix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * Colin (G3TNE) : Send a reset on syn ack replies in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * window but wrong (fixes NT lpd problems)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * Pedro Roque : Better TCP window handling, delayed ack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * Joerg Reuter : No modification of locked buffers in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * tcp_do_retransmit()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * Eric Schenk : Changed receiver side silly window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * avoidance algorithm to BSD style
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * algorithm. This doubles throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * against machines running Solaris,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * and seems to result in general
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * improvement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * Stefan Magdalinski : adjusted tcp_readable() to fix FIONREAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * Willy Konynenberg : Transparent proxying support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * Mike McLagan : Routing by source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * Keith Owens : Do proper merging with partial SKB's in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * tcp_do_sendmsg to avoid burstiness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * Eric Schenk : Fix fast close down bug with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * shutdown() followed by close().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * Andi Kleen : Make poll agree with SIGIO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * Salvatore Sanfilippo : Support SO_LINGER with linger == 1 and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * lingertime == 0 (RFC 793 ABORT Call)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * Hirokazu Takahashi : Use copy_from_user() instead of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * csum_and_copy_from_user() if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * Description of States:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * TCP_SYN_SENT sent a connection request, waiting for ack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * TCP_SYN_RECV received a connection request, sent ack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * waiting for final ack in three-way handshake.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * TCP_ESTABLISHED connection established
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * TCP_FIN_WAIT1 our side has shutdown, waiting to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * transmission of remaining buffered data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * TCP_FIN_WAIT2 all buffered data sent, waiting for remote
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * to shutdown
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * TCP_CLOSING both sides have shutdown but we still have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * data we have to finish sending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * TCP_TIME_WAIT timeout to catch resent junk before entering
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * closed, can only be entered from FIN_WAIT2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * or CLOSING. Required because the other end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * may not have gotten our last ACK causing it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * to retransmit the data packet (which we ignore)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * TCP_CLOSE_WAIT remote side has shutdown and is waiting for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * us to finish writing our data and to shutdown
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * (we have to close() to move on to LAST_ACK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * TCP_LAST_ACK out side has shutdown after remote has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * shutdown. There may still be data in our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * buffer that we have to finish sending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * TCP_CLOSE socket is finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) #define pr_fmt(fmt) "TCP: " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) #include <crypto/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) #include <linux/fcntl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) #include <linux/poll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) #include <linux/inet_diag.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) #include <linux/skbuff.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) #include <linux/scatterlist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) #include <linux/splice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) #include <linux/net.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) #include <linux/socket.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) #include <linux/random.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) #include <linux/cache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) #include <linux/errqueue.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) #include <linux/static_key.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) #include <net/icmp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) #include <net/inet_common.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) #include <net/tcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) #include <net/mptcp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) #include <net/xfrm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) #include <net/ip.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) #include <net/sock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) #include <asm/ioctls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) #include <net/busy_poll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) #include <trace/hooks/ipv4.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct percpu_counter tcp_orphan_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) EXPORT_SYMBOL_GPL(tcp_orphan_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) long sysctl_tcp_mem[3] __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) EXPORT_SYMBOL(sysctl_tcp_mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) EXPORT_SYMBOL(tcp_memory_allocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) #if IS_ENABLED(CONFIG_SMC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) EXPORT_SYMBOL(tcp_have_smc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * Current number of TCP sockets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) struct percpu_counter tcp_sockets_allocated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) EXPORT_SYMBOL(tcp_sockets_allocated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * TCP splice context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) struct tcp_splice_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) struct pipe_inode_info *pipe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * Pressure flag: try to collapse.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * Technical note: it is used by multiple contexts non atomically.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * All the __sk_mem_schedule() is of this nature: accounting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * is strict, actions are advisory and have some latency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) unsigned long tcp_memory_pressure __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) EXPORT_SYMBOL_GPL(tcp_memory_pressure);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) EXPORT_SYMBOL(tcp_rx_skb_cache_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) void tcp_enter_memory_pressure(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) unsigned long val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (READ_ONCE(tcp_memory_pressure))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) val = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (!val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) val--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (!cmpxchg(&tcp_memory_pressure, 0, val))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) void tcp_leave_memory_pressure(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) unsigned long val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) if (!READ_ONCE(tcp_memory_pressure))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) val = xchg(&tcp_memory_pressure, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) if (val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) jiffies_to_msecs(jiffies - val));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /* Convert seconds to retransmits based on initial and max timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) u8 res = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) if (seconds > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) int period = timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) res = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) while (seconds > period && res < 255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) res++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) timeout <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (timeout > rto_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) timeout = rto_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) period += timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) /* Convert retransmits to seconds based on initial and max timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) int period = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (retrans > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) period = timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) while (--retrans) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) timeout <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) if (timeout > rto_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) timeout = rto_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) period += timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) return period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) u32 rate = READ_ONCE(tp->rate_delivered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) u32 intv = READ_ONCE(tp->rate_interval_us);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) u64 rate64 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) if (rate && intv) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) do_div(rate64, intv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) return rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /* Address-family independent initialization for a tcp_sock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * NOTE: A lot of things set to zero explicitly by call to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * sk_alloc() so need not be done here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) void tcp_init_sock(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) tp->out_of_order_queue = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) sk->tcp_rtx_queue = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) tcp_init_xmit_timers(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) INIT_LIST_HEAD(&tp->tsq_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) INIT_LIST_HEAD(&tp->tsorted_sent_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) icsk->icsk_rto = TCP_TIMEOUT_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) icsk->icsk_rto_min = TCP_RTO_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) icsk->icsk_delack_max = TCP_DELACK_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) /* So many TCP implementations out there (incorrectly) count the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * initial SYN frame in their delayed-ACK and congestion control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * algorithms that we must have the following bandaid to talk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * efficiently to them. -DaveM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) tp->snd_cwnd = TCP_INIT_CWND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /* There's a bubble in the pipe until at least the first ACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) tp->app_limited = ~0U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) /* See draft-stevens-tcpca-spec-01 for discussion of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * initialization of these values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) tp->snd_cwnd_clamp = ~0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) tp->mss_cache = TCP_MSS_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) tcp_assign_congestion_control(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) tp->tsoffset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) tp->rack.reo_wnd_steps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) sk->sk_write_space = sk_stream_write_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) icsk->icsk_sync_mss = tcp_sync_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) sk_sockets_allocated_inc(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) sk->sk_route_forced_caps = NETIF_F_GSO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) EXPORT_SYMBOL(tcp_init_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) struct sk_buff *skb = tcp_write_queue_tail(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (tsflags && skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) struct skb_shared_info *shinfo = skb_shinfo(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if (tsflags & SOF_TIMESTAMPING_TX_ACK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) tcb->txstamp_ack = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) int target, struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) if (avail > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (avail >= target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) if (tcp_rmem_pressure(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) if (sk->sk_prot->stream_memory_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) return sk->sk_prot->stream_memory_read(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * Wait for a TCP event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * Note that we don't need to lock the socket, as the upper poll layers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * take care of normal races (between the test and the event) and we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * go look at any of the socket buffers directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) __poll_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) int state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) sock_poll_wait(file, sock, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) state = inet_sk_state_load(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) if (state == TCP_LISTEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) return inet_csk_listen_poll(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) /* Socket is not locked. We are protected from async events
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * by poll logic and correct handling of state changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * made by other threads is impossible in any case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * EPOLLHUP is certainly not done right. But poll() doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * have a notion of HUP in just one direction, and for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * socket the read side is more interesting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * Some poll() documentation says that EPOLLHUP is incompatible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * with the EPOLLOUT/POLLWR flags, so somebody should check this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * all. But careful, it tends to be safer to return too many
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * bits than too few, and you can easily break real applications
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * if you don't tell them that something has hung up!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * Check-me.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * our fs/select.c). It means that after we received EOF,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) * poll always returns immediately, making impossible poll() on write()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * if and only if shutdown has been made in both directions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * Actually, it is interesting to look how Solaris and DUX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * solve this dilemma. I would prefer, if EPOLLHUP were maskable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * then we could set it on SND_SHUTDOWN. BTW examples given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * in Stevens' books assume exactly this behaviour, it explains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * why EPOLLHUP is incompatible with EPOLLOUT. --ANK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) * blocking on fresh not-connected or disconnected socket. --ANK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) mask |= EPOLLHUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) if (sk->sk_shutdown & RCV_SHUTDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) /* Connected or passive Fast Open socket? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) if (state != TCP_SYN_SENT &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) int target = sock_rcvlowat(sk, 0, INT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) !sock_flag(sk, SOCK_URGINLINE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) tp->urg_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) target++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (tcp_stream_is_readable(tp, target, sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) mask |= EPOLLIN | EPOLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (__sk_stream_is_writeable(sk, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) } else { /* send SIGIO later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) /* Race breaker. If space is freed after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * wspace test but before the flags are set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * IO signal will be lost. Memory barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * pairs with the input side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) if (__sk_stream_is_writeable(sk, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) if (tp->urg_data & TCP_URG_VALID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) mask |= EPOLLPRI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) /* Active TCP fastopen socket with defer_connect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) * Return EPOLLOUT so application can call write()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) * in order for kernel to generate SYN+data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) /* This barrier is coupled with smp_wmb() in tcp_reset() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) mask |= EPOLLERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) EXPORT_SYMBOL(tcp_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) int answ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) bool slow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) switch (cmd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) case SIOCINQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (sk->sk_state == TCP_LISTEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) slow = lock_sock_fast(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) answ = tcp_inq(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) unlock_sock_fast(sk, slow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) case SIOCATMARK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) answ = tp->urg_data &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) case SIOCOUTQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) if (sk->sk_state == TCP_LISTEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) answ = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) answ = READ_ONCE(tp->write_seq) - tp->snd_una;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) case SIOCOUTQNSD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) if (sk->sk_state == TCP_LISTEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) answ = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) answ = READ_ONCE(tp->write_seq) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) READ_ONCE(tp->snd_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) return -ENOIOCTLCMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return put_user(answ, (int __user *)arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) EXPORT_SYMBOL(tcp_ioctl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) tp->pushed_seq = tp->write_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) static inline bool forced_push(const struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) static void skb_entail(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) skb->csum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) tcb->seq = tcb->end_seq = tp->write_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) tcb->tcp_flags = TCPHDR_ACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) tcb->sacked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) __skb_header_release(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) tcp_add_write_queue_tail(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) sk_wmem_queued_add(sk, skb->truesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) sk_mem_charge(sk, skb->truesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) if (tp->nonagle & TCP_NAGLE_PUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) tp->nonagle &= ~TCP_NAGLE_PUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) tcp_slow_start_after_idle_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) if (flags & MSG_OOB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) tp->snd_up = tp->write_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) /* If a not yet filled skb is pushed, do not send it if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) * we have data packets in Qdisc or NIC queues :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) * Because TX completion will happen shortly, it gives a chance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) * to coalesce future sendmsg() payload into this skb, without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * need for a timer, and with no latency trade off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * As packets containing data payload have a bigger truesize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * than pure acks (dataless) packets, the last checks prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * autocorking if we only have an ACK in Qdisc/NIC queues,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * or if TX completion was delayed after we processed ACK packet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) int size_goal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) return skb->len < size_goal &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) !tcp_rtx_queue_empty(sk) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) void tcp_push(struct sock *sk, int flags, int mss_now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) int nonagle, int size_goal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) skb = tcp_write_queue_tail(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) if (!(flags & MSG_MORE) || forced_push(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) tcp_mark_push(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) tcp_mark_urg(tp, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) if (tcp_should_autocork(sk, skb, size_goal)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) /* avoid atomic op if TSQ_THROTTLED bit is already set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) /* It is possible TX completion already happened
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) * before we set TSQ_THROTTLED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (flags & MSG_MORE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) nonagle = TCP_NAGLE_CORK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) __tcp_push_pending_frames(sk, mss_now, nonagle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) unsigned int offset, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) struct tcp_splice_state *tss = rd_desc->arg.data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) min(rd_desc->count, len), tss->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) rd_desc->count -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) /* Store TCP splice context information in read_descriptor_t. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) read_descriptor_t rd_desc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) .arg.data = tss,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) .count = tss->len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) * tcp_splice_read - splice data from TCP socket to a pipe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) * @sock: socket to splice from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) * @ppos: position (not valid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) * @pipe: pipe to splice to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * @len: number of bytes to splice
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * @flags: splice modifier flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * Will read pages from given socket and fill them into a pipe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) struct pipe_inode_info *pipe, size_t len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) struct sock *sk = sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) struct tcp_splice_state tss = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) .pipe = pipe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) .len = len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) .flags = flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) long timeo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) ssize_t spliced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) sock_rps_record_flow(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * We can't seek on a socket input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (unlikely(*ppos))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) return -ESPIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) ret = spliced = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) while (tss.len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) ret = __tcp_splice_read(sk, &tss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) else if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) if (spliced)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (sock_flag(sk, SOCK_DONE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) if (sk->sk_err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) ret = sock_error(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) if (sk->sk_shutdown & RCV_SHUTDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (sk->sk_state == TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) * This occurs when user tries to read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) * from never connected socket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) ret = -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (!timeo) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) /* if __tcp_splice_read() got nothing while we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) * an skb in receive queue, we do not want to loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) * This might happen with URG data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (!skb_queue_empty(&sk->sk_receive_queue))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) sk_wait_data(sk, &timeo, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) if (signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) ret = sock_intr_errno(timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) tss.len -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) spliced += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) if (!timeo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) (sk->sk_shutdown & RCV_SHUTDOWN) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) if (spliced)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) return spliced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) EXPORT_SYMBOL(tcp_splice_read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) bool force_schedule)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) if (likely(!size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) skb = sk->sk_tx_skb_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) sk->sk_tx_skb_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) pskb_trim(skb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) skb_shinfo(skb)->tx_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) /* The TCP header must be at least 32-bit aligned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) size = ALIGN(size, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) if (unlikely(tcp_under_memory_pressure(sk)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) sk_mem_reclaim_partial(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) if (likely(skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) bool mem_scheduled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (force_schedule) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) mem_scheduled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) sk_forced_mem_schedule(sk, skb->truesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) if (likely(mem_scheduled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) skb_reserve(skb, sk->sk_prot->max_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) * Make sure that we have exactly size bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * available to the caller, no more, no less.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) skb->reserved_tailroom = skb->end - skb->tail - size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) sk->sk_prot->enter_memory_pressure(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) sk_stream_moderate_sndbuf(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) int large_allowed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) u32 new_size_goal, size_goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) if (!large_allowed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) return mss_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) /* Note : tcp_tso_autosize() will eventually split this later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) /* We try hard to avoid divides here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) size_goal = tp->gso_segs * mss_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (unlikely(new_size_goal < size_goal ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) new_size_goal >= size_goal + mss_now)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) tp->gso_segs = min_t(u16, new_size_goal / mss_now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) sk->sk_gso_max_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) size_goal = tp->gso_segs * mss_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) return max(size_goal, mss_now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) int mss_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) mss_now = tcp_current_mss(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) return mss_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) /* In some cases, both sendpage() and sendmsg() could have added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) * an skb to the write queue, but failed adding payload on it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * We need to remove it to consume less memory, but more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) * importantly be able to generate EPOLLOUT for Edge Trigger epoll()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * users.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) tcp_unlink_write_queue(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) if (tcp_write_queue_empty(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) sk_wmem_free_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) size_t size, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) int mss_now, size_goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) ssize_t copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) if (IS_ENABLED(CONFIG_DEBUG_VM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) WARN_ONCE(!sendpage_ok(page),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) "page must not be a Slab one and have page_count > 0"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) /* Wait for a connection to finish. One exception is TCP Fast Open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * (passive side) where data is allowed to be sent before a connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * is fully established.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) !tcp_passive_fastopen(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) err = sk_stream_wait_connect(sk, &timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (err != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) mss_now = tcp_send_mss(sk, &size_goal, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) err = -EPIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) while (size > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) struct sk_buff *skb = tcp_write_queue_tail(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) int copy, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) bool can_coalesce;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) if (!skb || (copy = size_goal - skb->len) <= 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) !tcp_skb_can_collapse_to(skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) new_segment:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) if (!sk_stream_memory_free(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) tcp_rtx_and_write_queues_empty(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) #ifdef CONFIG_TLS_DEVICE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) skb_entail(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) copy = size_goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) if (copy > size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) copy = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) i = skb_shinfo(skb)->nr_frags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) can_coalesce = skb_can_coalesce(skb, i, page, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) if (!can_coalesce && i >= sysctl_max_skb_frags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) tcp_mark_push(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) goto new_segment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) if (!sk_wmem_schedule(sk, copy))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) if (can_coalesce) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) skb_fill_page_desc(skb, i, page, offset, copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) if (!(flags & MSG_NO_SHARED_FRAGS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) skb->len += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) skb->data_len += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) skb->truesize += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) sk_wmem_queued_add(sk, copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) sk_mem_charge(sk, copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) skb->ip_summed = CHECKSUM_PARTIAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) TCP_SKB_CB(skb)->end_seq += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) tcp_skb_pcount_set(skb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) if (!copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) copied += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) offset += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) size -= copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) if (!size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) if (skb->len < size_goal || (flags & MSG_OOB))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) if (forced_push(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) tcp_mark_push(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) } else if (skb == tcp_send_head(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) tcp_push_one(sk, mss_now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) wait_for_space:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) tcp_push(sk, flags & ~MSG_MORE, mss_now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) TCP_NAGLE_PUSH, size_goal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) err = sk_stream_wait_memory(sk, &timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) if (err != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) goto do_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) mss_now = tcp_send_mss(sk, &size_goal, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if (copied) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) tcp_tx_timestamp(sk, sk->sk_tsflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) if (!(flags & MSG_SENDPAGE_NOTLAST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) do_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) /* make sure we wake any epoll edge trigger waiter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) sk->sk_write_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) return sk_stream_error(sk, flags, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) EXPORT_SYMBOL_GPL(do_tcp_sendpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) size_t size, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (!(sk->sk_route_caps & NETIF_F_SG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) return sock_no_sendpage_locked(sk, page, offset, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) tcp_rate_check_app_limited(sk); /* is sending application-limited? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) return do_tcp_sendpages(sk, page, offset, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) int tcp_sendpage(struct sock *sk, struct page *page, int offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) size_t size, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) ret = tcp_sendpage_locked(sk, page, offset, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) EXPORT_SYMBOL(tcp_sendpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) void tcp_free_fastopen_req(struct tcp_sock *tp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) if (tp->fastopen_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) kfree(tp->fastopen_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) tp->fastopen_req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) int *copied, size_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) struct ubuf_info *uarg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) struct inet_sock *inet = inet_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) struct sockaddr *uaddr = msg->msg_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) int err, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) uaddr->sa_family == AF_UNSPEC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) if (tp->fastopen_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) return -EALREADY; /* Another Fast Open is in progress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) sk->sk_allocation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) if (unlikely(!tp->fastopen_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) tp->fastopen_req->data = msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) tp->fastopen_req->size = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) tp->fastopen_req->uarg = uarg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (inet->defer_connect) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) err = tcp_connect(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) /* Same failure procedure as in tcp_v4/6_connect */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) inet->inet_dport = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) sk->sk_route_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) err = __inet_stream_connect(sk->sk_socket, uaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) msg->msg_namelen, flags, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) /* fastopen_req could already be freed in __inet_stream_connect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) * if the connection times out or gets rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) if (tp->fastopen_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) *copied = tp->fastopen_req->copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) tcp_free_fastopen_req(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) inet->defer_connect = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) struct ubuf_info *uarg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) struct sockcm_cookie sockc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) int flags, err, copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) int mss_now = 0, size_goal, copied_syn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) int process_backlog = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) bool zc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) long timeo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) trace_android_rvh_tcp_sendmsg_locked(sk, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) flags = msg->msg_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) skb = tcp_write_queue_tail(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) if (!uarg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) err = -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) zc = sk->sk_route_caps & NETIF_F_SG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) if (!zc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) uarg->zerocopy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) !tp->repair) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) if (err == -EINPROGRESS && copied_syn > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) else if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) tcp_rate_check_app_limited(sk); /* is sending application-limited? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) /* Wait for a connection to finish. One exception is TCP Fast Open
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) * (passive side) where data is allowed to be sent before a connection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) * is fully established.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) !tcp_passive_fastopen(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) err = sk_stream_wait_connect(sk, &timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) if (err != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) goto do_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) if (unlikely(tp->repair)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) if (tp->repair_queue == TCP_RECV_QUEUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) copied = tcp_send_rcvq(sk, msg, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) goto out_nopush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if (tp->repair_queue == TCP_NO_QUEUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) /* 'common' sending to sendq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) sockcm_init(&sockc, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) if (msg->msg_controllen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) err = sock_cmsg_send(sk, msg, &sockc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) /* This should be in poll */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) /* Ok commence sending. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) mss_now = tcp_send_mss(sk, &size_goal, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) err = -EPIPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) goto do_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) while (msg_data_left(msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) int copy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) skb = tcp_write_queue_tail(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) copy = size_goal - skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) bool first_skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) new_segment:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) if (!sk_stream_memory_free(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) if (unlikely(process_backlog >= 16)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) process_backlog = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) if (sk_flush_backlog(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) first_skb = tcp_rtx_and_write_queues_empty(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) first_skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) process_backlog++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) skb->ip_summed = CHECKSUM_PARTIAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) skb_entail(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) copy = size_goal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) /* All packets are restored as if they have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * already been sent. skb_mstamp_ns isn't set to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) * avoid wrong rtt estimation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) if (tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) /* Try to append data to the end of skb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) if (copy > msg_data_left(msg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) copy = msg_data_left(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /* Where to copy to? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) if (skb_availroom(skb) > 0 && !zc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) /* We have some space in skb head. Superb! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) copy = min_t(int, copy, skb_availroom(skb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) goto do_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) } else if (!zc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) bool merge = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) int i = skb_shinfo(skb)->nr_frags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) struct page_frag *pfrag = sk_page_frag(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) if (!sk_page_frag_refill(sk, pfrag))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) if (!skb_can_coalesce(skb, i, pfrag->page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) pfrag->offset)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) if (i >= sysctl_max_skb_frags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) tcp_mark_push(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) goto new_segment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) merge = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) copy = min_t(int, copy, pfrag->size - pfrag->offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) if (!sk_wmem_schedule(sk, copy))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) pfrag->page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) pfrag->offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) goto do_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) /* Update the skb. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) if (merge) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) skb_fill_page_desc(skb, i, pfrag->page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) pfrag->offset, copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) page_ref_inc(pfrag->page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) pfrag->offset += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) if (!sk_wmem_schedule(sk, copy))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) goto wait_for_space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) if (err == -EMSGSIZE || err == -EEXIST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) tcp_mark_push(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) goto new_segment;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) goto do_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) copy = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) if (!copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) TCP_SKB_CB(skb)->end_seq += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) tcp_skb_pcount_set(skb, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) copied += copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) if (!msg_data_left(msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) if (unlikely(flags & MSG_EOR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) TCP_SKB_CB(skb)->eor = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) if (forced_push(tp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) tcp_mark_push(tp, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) } else if (skb == tcp_send_head(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) tcp_push_one(sk, mss_now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) wait_for_space:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) if (copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) tcp_push(sk, flags & ~MSG_MORE, mss_now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) TCP_NAGLE_PUSH, size_goal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) err = sk_stream_wait_memory(sk, &timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) if (err != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) goto do_error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) mss_now = tcp_send_mss(sk, &size_goal, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) if (copied) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) tcp_tx_timestamp(sk, sockc.tsflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) out_nopush:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) sock_zerocopy_put(uarg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) return copied + copied_syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) do_error:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) skb = tcp_write_queue_tail(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) do_fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) tcp_remove_empty_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) if (copied + copied_syn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) sock_zerocopy_put_abort(uarg, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) err = sk_stream_error(sk, flags, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) /* make sure we wake any epoll edge trigger waiter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) sk->sk_write_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) EXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) ret = tcp_sendmsg_locked(sk, msg, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) EXPORT_SYMBOL(tcp_sendmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * Handle reading urgent data. BSD has very simple semantics for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) * this, no blocking and very strange errors 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) /* No URG data to read. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) tp->urg_data == TCP_URG_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) return -EINVAL; /* Yes this is right ! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) if (tp->urg_data & TCP_URG_VALID) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) char c = tp->urg_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) if (!(flags & MSG_PEEK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) tp->urg_data = TCP_URG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) /* Read urgent data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) msg->msg_flags |= MSG_OOB;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) if (!(flags & MSG_TRUNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) err = memcpy_to_msg(msg, &c, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) msg->msg_flags |= MSG_TRUNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) return err ? -EFAULT : len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) /* Fixed the recv(..., MSG_OOB) behaviour. BSD docs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) * the available implementations agree in this case:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) * this call should never block, independent of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) * blocking state of the socket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) * Mike <pall@rz.uni-karlsruhe.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) int copied = 0, err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) /* XXX -- need to support SO_PEEK_OFF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) copied += skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) skb_queue_walk(&sk->sk_write_queue, skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) copied += skb->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) return err ?: copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) /* Clean up the receive buffer for full frames taken by the user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) * then send an ACK if necessary. COPIED is the number of bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) * tcp_recvmsg has given to the user so far, it speeds up the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * calculation of whether or not we must ACK for the sake of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * a window update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) void tcp_cleanup_rbuf(struct sock *sk, int copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) bool time_to_ack = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) if (inet_csk_ack_scheduled(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) if (/* Once-per-two-segments ACK was not sent by tcp_input.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) * If this read emptied read buffer, we send ACK, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) * connection is not bidirectional, user drained
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) * receive buffer and there was a small segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) * in queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) (copied > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) !inet_csk_in_pingpong_mode(sk))) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) !atomic_read(&sk->sk_rmem_alloc)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) time_to_ack = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) /* We send an ACK if we can now advertise a non-zero window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) * which has been raised "significantly".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) * Even if window raised up to infinity, do not send window open ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) * in states, where we will not receive more. It is useless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) __u32 rcv_window_now = tcp_receive_window(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) /* Optimize, __tcp_select_window() is not cheap. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) if (2*rcv_window_now <= tp->window_clamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) __u32 new_window = __tcp_select_window(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) /* Send ACK now, if this read freed lots of space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) * in our buffer. Certainly, new_window is new window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) * We can advertise it now, if it is not less than current one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * "Lots" means "at least twice" here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) if (new_window && new_window >= 2 * rcv_window_now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) time_to_ack = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) if (time_to_ack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) tcp_send_ack(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) u32 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) offset = seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) pr_err_once("%s: found a SYN, please report !\n", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) offset--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) *off = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) return skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) /* This looks weird, but this can happen if TCP collapsing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) * splitted a fat GRO packet, while we released socket lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) * in skb_splice_bits()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) sk_eat_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) * This routine provides an alternative to tcp_recvmsg() for routines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) * that would like to handle copying from skbuffs directly in 'sendfile'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) * fashion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) * Note:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * - It is assumed that the socket was locked by the caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) * - The routine does not block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) * - At present, there is no support for reading OOB data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) * or for 'peeking' the socket using this routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * (although both would be easy to implement).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) sk_read_actor_t recv_actor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) u32 seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) u32 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) int copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) if (sk->sk_state == TCP_LISTEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) if (offset < skb->len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) int used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) len = skb->len - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) /* Stop reading if we hit a patch of urgent data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) if (tp->urg_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) u32 urg_offset = tp->urg_seq - seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) if (urg_offset < len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) len = urg_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) used = recv_actor(desc, skb, offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) if (used <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) if (!copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) copied = used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) if (WARN_ON_ONCE(used > len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) used = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) seq += used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) copied += used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) offset += used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) /* If recv_actor drops the lock (e.g. TCP splice
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) * receive) the skb pointer might be invalid when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) * getting here: tcp_collapse might have deleted it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) * while aggregating skbs from the socket queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) skb = tcp_recv_skb(sk, seq - 1, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) if (!skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) /* TCP coalescing might have appended data to the skb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) * Try to splice more frags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) if (offset + 1 != skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) sk_eat_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) ++seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) sk_eat_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) if (!desc->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) WRITE_ONCE(tp->copied_seq, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) WRITE_ONCE(tp->copied_seq, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) tcp_rcv_space_adjust(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) /* Clean up data we have read: This will do ACK frames. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) if (copied > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) tcp_recv_skb(sk, seq, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) tcp_cleanup_rbuf(sk, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) EXPORT_SYMBOL(tcp_read_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) int tcp_peek_len(struct socket *sock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) return tcp_inq(sock->sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) EXPORT_SYMBOL(tcp_peek_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) int tcp_set_rcvlowat(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) int cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) cap = sk->sk_rcvbuf >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) val = min(val, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) /* Check if we need to signal EPOLLIN right now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) tcp_data_ready(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) val <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) if (val > sk->sk_rcvbuf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) WRITE_ONCE(sk->sk_rcvbuf, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) EXPORT_SYMBOL(tcp_set_rcvlowat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) static const struct vm_operations_struct tcp_vm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) int tcp_mmap(struct file *file, struct socket *sock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) if (vma->vm_flags & (VM_WRITE | VM_EXEC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) /* Instruct vm_insert_page() to not mmap_read_lock(mm) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) vma->vm_flags |= VM_MIXEDMAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) vma->vm_ops = &tcp_vm_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) EXPORT_SYMBOL(tcp_mmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) u32 *offset_frag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) skb_frag_t *frag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) if (unlikely(offset_skb >= skb->len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) offset_skb -= skb_headlen(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) if ((int)offset_skb < 0 || skb_has_frag_list(skb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) frag = skb_shinfo(skb)->frags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) while (offset_skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) if (skb_frag_size(frag) > offset_skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) *offset_frag = offset_skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) return frag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) offset_skb -= skb_frag_size(frag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) ++frag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) *offset_frag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) return frag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) struct sk_buff *skb, u32 copylen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) u32 *offset, u32 *seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) unsigned long copy_address = (unsigned long)zc->copybuf_address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) struct msghdr msg = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) struct iovec iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) if (copy_address != zc->copybuf_address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) err = import_single_range(READ, (void __user *)copy_address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) copylen, &iov, &msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) err = skb_copy_datagram_msg(skb, *offset, &msg, copylen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) zc->recv_skip_hint -= copylen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) *offset += copylen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) *seq += copylen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) return (__s32)copylen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) u32 *seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) s32 copybuf_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) if (!copylen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) /* skb is null if inq < PAGE_SIZE. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) if (skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) offset = *seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) skb = tcp_recv_skb(sk, *seq, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) return zc->copybuf_len < 0 ? 0 : copylen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) unsigned long pages_to_map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) unsigned long *insert_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) u32 *length_with_pending,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) u32 *seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) struct tcp_zerocopy_receive *zc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) unsigned long pages_remaining = pages_to_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) int bytes_mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) /* Even if vm_insert_pages fails, it may have partially succeeded in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * mapping (some but not all of the pages).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) *seq += bytes_mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) *insert_addr += bytes_mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) /* But if vm_insert_pages did fail, we have to unroll some state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) * we speculatively touched before.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) *length_with_pending -= bytes_not_mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) zc->recv_skip_hint += bytes_not_mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) static int tcp_zerocopy_receive(struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) struct tcp_zerocopy_receive *zc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) u32 length = 0, offset, vma_len, avail_len, aligned_len, copylen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) unsigned long address = (unsigned long)zc->address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) s32 copybuf_len = zc->copybuf_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) #define PAGE_BATCH_SIZE 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) struct page *pages[PAGE_BATCH_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) const skb_frag_t *frags = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) struct sk_buff *skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) unsigned long pg_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) unsigned long curr_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) u32 seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) int inq = tcp_inq(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) zc->copybuf_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) if (address & (PAGE_SIZE - 1) || address != zc->address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) if (sk->sk_state == TCP_LISTEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) return -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) sock_rps_record_flow(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) mmap_read_lock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) vma = find_vma(current->mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) avail_len = min_t(u32, vma_len, inq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) aligned_len = avail_len & ~(PAGE_SIZE - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) if (aligned_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) zap_page_range(vma, address, aligned_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) zc->length = aligned_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) zc->recv_skip_hint = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) zc->length = avail_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) zc->recv_skip_hint = avail_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) curr_addr = address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) while (length + PAGE_SIZE <= zc->length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) if (zc->recv_skip_hint < PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) u32 offset_frag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) /* If we're here, finish the current batch. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) if (pg_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) ret = tcp_zerocopy_vm_insert_batch(vma, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) pg_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) &curr_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) &length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) &seq, zc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) pg_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) if (zc->recv_skip_hint > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) skb = skb->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) offset = seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) skb = tcp_recv_skb(sk, seq, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) zc->recv_skip_hint = skb->len - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) frags = skb_advance_to_frag(skb, offset, &offset_frag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) if (!frags || offset_frag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) int remaining = zc->recv_skip_hint;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) while (remaining && (skb_frag_size(frags) != PAGE_SIZE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) skb_frag_off(frags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) remaining -= skb_frag_size(frags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) frags++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) zc->recv_skip_hint -= remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) pages[pg_idx] = skb_frag_page(frags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) pg_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) length += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) zc->recv_skip_hint -= PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) frags++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (pg_idx == PAGE_BATCH_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) &curr_addr, &length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) &seq, zc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) pg_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) if (pg_idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) &curr_addr, &length, &seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) zc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) /* Try to copy straggler data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) copybuf_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) if (length + copylen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) WRITE_ONCE(tp->copied_seq, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) tcp_rcv_space_adjust(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) /* Clean up data we have read: This will do ACK frames. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) tcp_recv_skb(sk, seq, &offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) tcp_cleanup_rbuf(sk, length + copylen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) if (length == zc->length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) zc->recv_skip_hint = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) zc->length = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) static void tcp_update_recv_tstamps(struct sk_buff *skb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) struct scm_timestamping_internal *tss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) if (skb->tstamp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) tss->ts[0] = ktime_to_timespec64(skb->tstamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) tss->ts[0] = (struct timespec64) {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) if (skb_hwtstamps(skb)->hwtstamp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) tss->ts[2] = (struct timespec64) {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) /* Similar to __sock_recv_timestamp, but does not require an skb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) struct scm_timestamping_internal *tss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) bool has_timestamping = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) if (sock_flag(sk, SOCK_RCVTSTAMP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) if (new_tstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) struct __kernel_timespec kts = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) .tv_sec = tss->ts[0].tv_sec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) .tv_nsec = tss->ts[0].tv_nsec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) sizeof(kts), &kts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) struct __kernel_old_timespec ts_old = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) .tv_sec = tss->ts[0].tv_sec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) .tv_nsec = tss->ts[0].tv_nsec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) sizeof(ts_old), &ts_old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) if (new_tstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) struct __kernel_sock_timeval stv = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) .tv_sec = tss->ts[0].tv_sec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) .tv_usec = tss->ts[0].tv_nsec / 1000,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) sizeof(stv), &stv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) struct __kernel_old_timeval tv = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) .tv_sec = tss->ts[0].tv_sec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) .tv_usec = tss->ts[0].tv_nsec / 1000,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) sizeof(tv), &tv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) has_timestamping = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) tss->ts[0] = (struct timespec64) {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) has_timestamping = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) tss->ts[2] = (struct timespec64) {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) if (has_timestamping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) tss->ts[1] = (struct timespec64) {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) if (sock_flag(sk, SOCK_TSTAMP_NEW))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) put_cmsg_scm_timestamping64(msg, tss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) put_cmsg_scm_timestamping(msg, tss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) static int tcp_inq_hint(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) u32 copied_seq = READ_ONCE(tp->copied_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) u32 rcv_nxt = READ_ONCE(tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) int inq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) inq = rcv_nxt - copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) inq = tp->rcv_nxt - tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) /* After receiving a FIN, tell the user-space to continue reading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) * by returning a non-zero inq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) if (inq == 0 && sock_flag(sk, SOCK_DONE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) inq = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) return inq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) * This routine copies from a sock struct into the user buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) * Technical note: in 2.3 we work on _locked_ socket, so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) * tricks with *seq access order and skb->users are not required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) * Probably, code can be easily improved even more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) int flags, int *addr_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) int copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) u32 peek_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) u32 *seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) unsigned long used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) int err, inq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) int target; /* Read at least this many bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) long timeo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) struct sk_buff *skb, *last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) u32 urg_hole = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) struct scm_timestamping_internal tss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) int cmsg_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) if (unlikely(flags & MSG_ERRQUEUE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) return inet_recv_error(sk, msg, len, addr_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) trace_android_rvh_tcp_recvmsg(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) (sk->sk_state == TCP_ESTABLISHED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) sk_busy_loop(sk, nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) err = -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) if (sk->sk_state == TCP_LISTEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) cmsg_flags = tp->recvmsg_inq ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) timeo = sock_rcvtimeo(sk, nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) /* Urgent data needs to be handled specially. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) if (flags & MSG_OOB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) goto recv_urg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) if (unlikely(tp->repair)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) if (!(flags & MSG_PEEK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) if (tp->repair_queue == TCP_SEND_QUEUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) goto recv_sndq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) if (tp->repair_queue == TCP_NO_QUEUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) /* 'common' recv queue MSG_PEEK-ing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) seq = &tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) if (flags & MSG_PEEK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) peek_seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) seq = &peek_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) u32 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) if (tp->urg_data && tp->urg_seq == *seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) if (copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) if (signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) /* Next get a buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) last = skb_peek_tail(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) skb_queue_walk(&sk->sk_receive_queue, skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) last = skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) /* Now that we have two receive queues this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) * shouldn't happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) offset = *seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) pr_err_once("%s: found a SYN, please report !\n", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) offset--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) if (offset < skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) goto found_ok_skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) goto found_fin_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) WARN(!(flags & MSG_PEEK),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) /* Well, if we have backlog, try to process it now yet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) if (copied) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) if (sk->sk_err ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) sk->sk_state == TCP_CLOSE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) (sk->sk_shutdown & RCV_SHUTDOWN) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) !timeo ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) if (sock_flag(sk, SOCK_DONE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) if (sk->sk_err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) copied = sock_error(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) if (sk->sk_shutdown & RCV_SHUTDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) if (sk->sk_state == TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) /* This occurs when user tries to read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) * from never connected socket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) copied = -ENOTCONN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) if (!timeo) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) copied = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) if (signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) copied = sock_intr_errno(timeo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) tcp_cleanup_rbuf(sk, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) if (copied >= target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) /* Do not sleep, just process backlog. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) sk_wait_data(sk, &timeo, last);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) if ((flags & MSG_PEEK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) (peek_seq - copied - urg_hole != tp->copied_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) current->comm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) task_pid_nr(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) peek_seq = tp->copied_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) found_ok_skb:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) /* Ok so how much can we use? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) used = skb->len - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) if (len < used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) used = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) /* Do we have urgent data here? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) if (tp->urg_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) u32 urg_offset = tp->urg_seq - *seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) if (urg_offset < used) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) if (!urg_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) if (!sock_flag(sk, SOCK_URGINLINE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) WRITE_ONCE(*seq, *seq + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) urg_hole++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) offset++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) used--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) if (!used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) goto skip_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) used = urg_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) if (!(flags & MSG_TRUNC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) err = skb_copy_datagram_msg(skb, offset, msg, used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) /* Exception. Bailout! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) if (!copied)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) copied = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) WRITE_ONCE(*seq, *seq + used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) copied += used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) len -= used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) tcp_rcv_space_adjust(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) skip_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) tp->urg_data = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) tcp_fast_path_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) if (TCP_SKB_CB(skb)->has_rxtstamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) tcp_update_recv_tstamps(skb, &tss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) cmsg_flags |= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) if (used + offset < skb->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) goto found_fin_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (!(flags & MSG_PEEK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) sk_eat_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) found_fin_ok:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) /* Process the FIN. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) WRITE_ONCE(*seq, *seq + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) if (!(flags & MSG_PEEK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) sk_eat_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) } while (len > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) trace_android_rvh_tcp_recvmsg_stat(sk, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) /* According to UNIX98, msg_name/msg_namelen are ignored
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) * on connected socket. I was just happy when found this 8) --ANK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) /* Clean up data we have read: This will do ACK frames. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) tcp_cleanup_rbuf(sk, copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) if (cmsg_flags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) if (cmsg_flags & 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) tcp_recv_timestamp(msg, sk, &tss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) if (cmsg_flags & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) inq = tcp_inq_hint(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) recv_urg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) err = tcp_recv_urg(sk, msg, len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) recv_sndq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) err = tcp_peek_sndq(sk, msg, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) EXPORT_SYMBOL(tcp_recvmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) void tcp_set_state(struct sock *sk, int state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) int oldstate = sk->sk_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) /* We defined a new enum for TCP states that are exported in BPF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) * so as not force the internal TCP states to be frozen. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) * following checks will detect if an internal state value ever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) * differs from the BPF value. If this ever happens, then we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) * need to remap the internal value to the BPF value before calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) * tcp_call_bpf_2arg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) switch (state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) case TCP_ESTABLISHED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) if (oldstate != TCP_ESTABLISHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) case TCP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) sk->sk_prot->unhash(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) if (inet_csk(sk)->icsk_bind_hash &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) inet_put_port(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) if (oldstate == TCP_ESTABLISHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) /* Change state AFTER socket is unhashed to avoid closed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) * socket sitting in hash tables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) inet_sk_state_store(sk, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) EXPORT_SYMBOL_GPL(tcp_set_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) * State processing on a close. This implements the state shift for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) * sending our FIN frame. Note that we only send a FIN for some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) * states. A shutdown() may have already sent the FIN, or we may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) * closed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) static const unsigned char new_state[16] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) /* current state: new state: action: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) [0 /* (Invalid) */] = TCP_CLOSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) [TCP_SYN_SENT] = TCP_CLOSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) [TCP_FIN_WAIT1] = TCP_FIN_WAIT1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) [TCP_FIN_WAIT2] = TCP_FIN_WAIT2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) [TCP_TIME_WAIT] = TCP_CLOSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) [TCP_CLOSE] = TCP_CLOSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) [TCP_LAST_ACK] = TCP_LAST_ACK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) [TCP_LISTEN] = TCP_CLOSE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) [TCP_CLOSING] = TCP_CLOSING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) static int tcp_close_state(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) int next = (int)new_state[sk->sk_state];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) int ns = next & TCP_STATE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) tcp_set_state(sk, ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) return next & TCP_ACTION_FIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) * Shutdown the sending side of a connection. Much like close except
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) * that we don't receive shut down or sock_set_flag(sk, SOCK_DEAD).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) void tcp_shutdown(struct sock *sk, int how)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) /* We need to grab some memory, and put together a FIN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) * and then put it into the queue to be sent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) * Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) if (!(how & SEND_SHUTDOWN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) /* If we've already sent a FIN, or it's a closed state, skip this. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) if ((1 << sk->sk_state) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) (TCPF_ESTABLISHED | TCPF_SYN_SENT |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) /* Clear out any half completed packets. FIN if needed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) if (tcp_close_state(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) tcp_send_fin(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) EXPORT_SYMBOL(tcp_shutdown);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) bool tcp_check_oom(struct sock *sk, int shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) bool too_many_orphans, out_of_socket_memory;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) too_many_orphans = tcp_too_many_orphans(sk, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) out_of_socket_memory = tcp_out_of_memory(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) if (too_many_orphans)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) net_info_ratelimited("too many orphaned sockets\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) if (out_of_socket_memory)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) return too_many_orphans || out_of_socket_memory;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) void tcp_close(struct sock *sk, long timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) int data_was_unread = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) int state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) sk->sk_shutdown = SHUTDOWN_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) if (sk->sk_state == TCP_LISTEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) /* Special case. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) inet_csk_listen_stop(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) goto adjudge_to_death;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) /* We need to flush the recv. buffs. We do this only on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) * descriptor close, not protocol-sourced closes, because the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) * reader process may not have drained the data yet!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) len--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) data_was_unread += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) if (sk->sk_state == TCP_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) goto adjudge_to_death;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) /* As outlined in RFC 2525, section 2.17, we send a RST here because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) * data was lost. To witness the awful effects of the old behavior of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) * GET in an FTP client, suspend the process, wait for the client to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) * advertise a zero window, then kill -9 the FTP client, wheee...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) * Note: timeout is always zero in such a case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) if (unlikely(tcp_sk(sk)->repair)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) sk->sk_prot->disconnect(sk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) } else if (data_was_unread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) /* Unread data was tossed, zap the connection. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) tcp_send_active_reset(sk, sk->sk_allocation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) /* Check zero linger _after_ checking for unread data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) sk->sk_prot->disconnect(sk, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) } else if (tcp_close_state(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) /* We FIN if the application ate all the data before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) * zapping the connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) /* RED-PEN. Formally speaking, we have broken TCP state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) * machine. State transitions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) * TCP_ESTABLISHED -> TCP_FIN_WAIT1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) * TCP_CLOSE_WAIT -> TCP_LAST_ACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) * are legal only when FIN has been sent (i.e. in window),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) * rather than queued out of window. Purists blame.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) * F.e. "RFC state" is ESTABLISHED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) * if Linux state is FIN-WAIT-1, but FIN is still not sent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) * The visible declinations are that sometimes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) * we enter time-wait state, when it is not required really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) * (harmless), do not send active resets, when they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) * they look as CLOSING or LAST_ACK for Linux)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) * Probably, I missed some more holelets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) * --ANK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) * XXX (TFO) - To start off we don't support SYN+ACK+FIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) * in a single packet! (May consider it later but will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) * probably need API support or TCP_CORK SYN-ACK until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) * data is written and socket is closed.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) tcp_send_fin(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) sk_stream_wait_close(sk, timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) adjudge_to_death:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) state = sk->sk_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) sock_hold(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) sock_orphan(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) bh_lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) /* remove backlog if any, without releasing ownership. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) __release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) percpu_counter_inc(sk->sk_prot->orphan_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) /* Have we already been destroyed by a softirq or backlog? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) /* This is a (useful) BSD violating of the RFC. There is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) * problem with TCP as specified in that the other end could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) * keep a socket open forever with no application left this end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) * We use a 1 minute timeout (about the same as BSD) then kill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) * our end. If they send after that then tough - BUT: long enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) * that we won't make the old 4*rto = almost no time - whoops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) * reset mistake.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) * Nope, it was not mistake. It is really desired behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) * f.e. on http servers, when such sockets are useless, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) * consume significant resources. Let's do it with special
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) * linger2 option. --ANK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) if (sk->sk_state == TCP_FIN_WAIT2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) if (tp->linger2 < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) tcp_send_active_reset(sk, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) __NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) LINUX_MIB_TCPABORTONLINGER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) const int tmo = tcp_fin_time(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) if (tmo > TCP_TIMEWAIT_LEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) inet_csk_reset_keepalive_timer(sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) tmo - TCP_TIMEWAIT_LEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) if (sk->sk_state != TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) if (tcp_check_oom(sk, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) tcp_send_active_reset(sk, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) __NET_INC_STATS(sock_net(sk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) LINUX_MIB_TCPABORTONMEMORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) } else if (!check_net(sock_net(sk))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) /* Not possible to send reset; just close */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) if (sk->sk_state == TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) lockdep_sock_is_held(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) /* We could get here with a non-NULL req if the socket is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) * aborted (e.g., closed with unread data) before 3WHS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) * finishes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) if (req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) reqsk_fastopen_remove(sk, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) inet_csk_destroy_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) /* Otherwise, socket is reprieved until protocol close. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) bh_unlock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) sock_put(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) EXPORT_SYMBOL(tcp_close);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) /* These states need RST on ABORT according to RFC793 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) static inline bool tcp_need_reset(int state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) return (1 << state) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) static void tcp_rtx_queue_purge(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) tcp_sk(sk)->highest_sack = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) struct sk_buff *skb = rb_to_skb(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) /* Since we are deleting whole queue, no need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) * list_del(&skb->tcp_tsorted_anchor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) tcp_rtx_queue_unlink(skb, sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) sk_wmem_free_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) void tcp_write_queue_purge(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) tcp_skb_tsorted_anchor_cleanup(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) sk_wmem_free_skb(sk, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) tcp_rtx_queue_purge(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) skb = sk->sk_tx_skb_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) __kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) sk->sk_tx_skb_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) sk_mem_reclaim(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) tcp_clear_all_retrans_hints(tcp_sk(sk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) tcp_sk(sk)->packets_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) inet_csk(sk)->icsk_backoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) int tcp_disconnect(struct sock *sk, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) struct inet_sock *inet = inet_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) int old_state = sk->sk_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) if (old_state != TCP_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) /* ABORT function of RFC793 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) if (old_state == TCP_LISTEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) inet_csk_listen_stop(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) } else if (unlikely(tp->repair)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) sk->sk_err = ECONNABORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) } else if (tcp_need_reset(old_state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) (tp->snd_nxt != tp->write_seq &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) /* The last check adjusts for discrepancy of Linux wrt. RFC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) * states
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) tcp_send_active_reset(sk, gfp_any());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) sk->sk_err = ECONNRESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) } else if (old_state == TCP_SYN_SENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) sk->sk_err = ECONNRESET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) tcp_clear_xmit_timers(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) __skb_queue_purge(&sk->sk_receive_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) if (sk->sk_rx_skb_cache) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) __kfree_skb(sk->sk_rx_skb_cache);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) sk->sk_rx_skb_cache = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) tp->urg_data = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) tcp_write_queue_purge(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) tcp_fastopen_active_disable_ofo_check(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) skb_rbtree_purge(&tp->out_of_order_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) inet->inet_dport = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) inet_reset_saddr(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) sk->sk_shutdown = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) sock_reset_flag(sk, SOCK_DONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) tp->srtt_us = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) tp->rcv_rtt_last_tsecr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) seq = tp->write_seq + tp->max_window + 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) if (!seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) seq = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) WRITE_ONCE(tp->write_seq, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) icsk->icsk_backoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) icsk->icsk_probes_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) icsk->icsk_probes_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) icsk->icsk_rto = TCP_TIMEOUT_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) icsk->icsk_rto_min = TCP_RTO_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) icsk->icsk_delack_max = TCP_DELACK_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) tp->snd_cwnd = TCP_INIT_CWND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) tp->snd_cwnd_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) tp->window_clamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) tp->delivered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) tp->delivered_ce = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) if (icsk->icsk_ca_ops->release)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) icsk->icsk_ca_ops->release(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) icsk->icsk_ca_initialized = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) tcp_set_ca_state(sk, TCP_CA_Open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) tp->is_sack_reneg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) tcp_clear_retrans(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) tp->total_retrans = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) inet_csk_delack_init(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) * issue in __tcp_select_window()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) __sk_dst_reset(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) dst_release(sk->sk_rx_dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) sk->sk_rx_dst = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) tcp_saved_syn_free(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) tp->compressed_ack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) tp->segs_in = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) tp->segs_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) tp->bytes_sent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) tp->bytes_acked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) tp->bytes_received = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) tp->bytes_retrans = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) tp->data_segs_in = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) tp->data_segs_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) tp->duplicate_sack[0].start_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) tp->duplicate_sack[0].end_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) tp->dsack_dups = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) tp->reord_seen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) tp->retrans_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) tp->sacked_out = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) tp->tlp_high_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) tp->last_oow_ack_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) /* There's a bubble in the pipe until at least the first ACK. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) tp->app_limited = ~0U;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) tp->rack.mstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) tp->rack.advanced = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) tp->rack.reo_wnd_steps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) tp->rack.last_delivered = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) tp->rack.reo_wnd_persist = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) tp->rack.dsack_seen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) tp->syn_data_acked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) tp->rx_opt.saw_tstamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) tp->rx_opt.dsack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) tp->rx_opt.num_sacks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) tp->rcv_ooopack = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) /* Clean up fastopen related fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) tcp_free_fastopen_req(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) inet->defer_connect = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) tp->fastopen_client_fail = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) if (sk->sk_frag.page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) put_page(sk->sk_frag.page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) sk->sk_frag.page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) sk->sk_frag.offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) sk->sk_error_report(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) EXPORT_SYMBOL(tcp_disconnect);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) static inline bool tcp_can_repair_sock(const struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) (sk->sk_state != TCP_LISTEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) struct tcp_repair_window opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) if (!tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) if (len != sizeof(opt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) if (opt.max_window < opt.snd_wnd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) if (after(opt.rcv_wup, tp->rcv_nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) tp->snd_wl1 = opt.snd_wl1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) tp->snd_wnd = opt.snd_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) tp->max_window = opt.max_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) tp->rcv_wnd = opt.rcv_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) tp->rcv_wup = opt.rcv_wup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) unsigned int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) struct tcp_repair_opt opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) size_t offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) while (len >= sizeof(opt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) if (copy_from_sockptr_offset(&opt, optbuf, offset, sizeof(opt)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) offset += sizeof(opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) len -= sizeof(opt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) switch (opt.opt_code) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) case TCPOPT_MSS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) tp->rx_opt.mss_clamp = opt.opt_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) tcp_mtup_init(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) case TCPOPT_WINDOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) u16 snd_wscale = opt.opt_val & 0xFFFF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) u16 rcv_wscale = opt.opt_val >> 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) return -EFBIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) tp->rx_opt.snd_wscale = snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) tp->rx_opt.rcv_wscale = rcv_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) tp->rx_opt.wscale_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) case TCPOPT_SACK_PERM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) if (opt.opt_val != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) case TCPOPT_TIMESTAMP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) if (opt.opt_val != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) tp->rx_opt.tstamp_ok = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) EXPORT_SYMBOL(tcp_tx_delay_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) static void tcp_enable_tx_delay(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) if (!static_branch_unlikely(&tcp_tx_delay_enabled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) static int __tcp_tx_delay_enabled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) static_branch_enable(&tcp_tx_delay_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) pr_info("TCP_TX_DELAY enabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) /* When set indicates to always queue non-full frames. Later the user clears
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) * this option and we transmit any pending partial frames in the queue. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) * meant to be used alongside sendfile() to get properly filled frames when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) * user (for example) must write out headers with a write() call first and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) * use sendfile to send out the data parts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) * TCP_NODELAY.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) static void __tcp_sock_set_cork(struct sock *sk, bool on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) if (on) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) tp->nonagle |= TCP_NAGLE_CORK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) tp->nonagle &= ~TCP_NAGLE_CORK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) if (tp->nonagle & TCP_NAGLE_OFF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) tp->nonagle |= TCP_NAGLE_PUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) tcp_push_pending_frames(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) void tcp_sock_set_cork(struct sock *sk, bool on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) __tcp_sock_set_cork(sk, on);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) EXPORT_SYMBOL(tcp_sock_set_cork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) /* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) * remembered, but it is not activated until cork is cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) * However, when TCP_NODELAY is set we make an explicit push, which overrides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) * even TCP_CORK for currently queued segments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) static void __tcp_sock_set_nodelay(struct sock *sk, bool on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) if (on) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) tcp_push_pending_frames(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) void tcp_sock_set_nodelay(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) __tcp_sock_set_nodelay(sk, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) EXPORT_SYMBOL(tcp_sock_set_nodelay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) static void __tcp_sock_set_quickack(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) if (!val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) inet_csk_enter_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) inet_csk_exit_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) inet_csk_ack_scheduled(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) tcp_cleanup_rbuf(sk, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) if (!(val & 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) inet_csk_enter_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) void tcp_sock_set_quickack(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) __tcp_sock_set_quickack(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) EXPORT_SYMBOL(tcp_sock_set_quickack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) int tcp_sock_set_syncnt(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) if (val < 1 || val > MAX_TCP_SYNCNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) inet_csk(sk)->icsk_syn_retries = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) EXPORT_SYMBOL(tcp_sock_set_syncnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) inet_csk(sk)->icsk_user_timeout = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) EXPORT_SYMBOL(tcp_sock_set_user_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) if (val < 1 || val > MAX_TCP_KEEPIDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) tp->keepalive_time = val * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) if (sock_flag(sk, SOCK_KEEPOPEN) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) u32 elapsed = keepalive_time_elapsed(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) if (tp->keepalive_time > elapsed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) elapsed = tp->keepalive_time - elapsed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) elapsed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) inet_csk_reset_keepalive_timer(sk, elapsed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) int tcp_sock_set_keepidle(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) err = tcp_sock_set_keepidle_locked(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) EXPORT_SYMBOL(tcp_sock_set_keepidle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) int tcp_sock_set_keepintvl(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) if (val < 1 || val > MAX_TCP_KEEPINTVL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) tcp_sk(sk)->keepalive_intvl = val * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) EXPORT_SYMBOL(tcp_sock_set_keepintvl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) int tcp_sock_set_keepcnt(struct sock *sk, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) if (val < 1 || val > MAX_TCP_KEEPCNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) tcp_sk(sk)->keepalive_probes = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) EXPORT_SYMBOL(tcp_sock_set_keepcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) * Socket option code for TCP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) sockptr_t optval, unsigned int optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) int val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) /* These are data/string values, all the others are ints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) switch (optname) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) case TCP_CONGESTION: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) char name[TCP_CA_NAME_MAX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) if (optlen < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) val = strncpy_from_sockptr(name, optval,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) min_t(long, TCP_CA_NAME_MAX-1, optlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) if (val < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) name[val] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) err = tcp_set_congestion_control(sk, name, true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) ns_capable(sock_net(sk)->user_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) CAP_NET_ADMIN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) case TCP_ULP: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) char name[TCP_ULP_NAME_MAX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) if (optlen < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) val = strncpy_from_sockptr(name, optval,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) min_t(long, TCP_ULP_NAME_MAX - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) optlen));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) if (val < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) name[val] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) err = tcp_set_ulp(sk, name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) case TCP_FASTOPEN_KEY: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) __u8 *backup_key = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) /* Allow a backup key as well to facilitate key rotation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) * First key is the active one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) if (copy_from_sockptr(key, optval, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) return tcp_fastopen_reset_cipher(net, sk, key, backup_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) /* fallthru */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) if (optlen < sizeof(int))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) if (copy_from_sockptr(&val, optval, sizeof(val)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) switch (optname) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) case TCP_MAXSEG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) /* Values greater than interface MTU won't take effect. However
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) * at the point when this call is done we typically don't yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) * know which interface is going to be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) tp->rx_opt.user_mss = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) case TCP_NODELAY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) __tcp_sock_set_nodelay(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) case TCP_THIN_LINEAR_TIMEOUTS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) if (val < 0 || val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) tp->thin_lto = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) case TCP_THIN_DUPACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) if (val < 0 || val > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) case TCP_REPAIR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) if (!tcp_can_repair_sock(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) else if (val == TCP_REPAIR_ON) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) tp->repair = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) sk->sk_reuse = SK_FORCE_REUSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) tp->repair_queue = TCP_NO_QUEUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) } else if (val == TCP_REPAIR_OFF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) tp->repair = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) sk->sk_reuse = SK_NO_REUSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) tcp_send_window_probe(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) } else if (val == TCP_REPAIR_OFF_NO_WP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) tp->repair = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) sk->sk_reuse = SK_NO_REUSE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) case TCP_REPAIR_QUEUE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) if (!tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) else if ((unsigned int)val < TCP_QUEUES_NR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) tp->repair_queue = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) case TCP_QUEUE_SEQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) if (sk->sk_state != TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) } else if (tp->repair_queue == TCP_SEND_QUEUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) if (!tcp_rtx_queue_empty(sk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) WRITE_ONCE(tp->write_seq, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) } else if (tp->repair_queue == TCP_RECV_QUEUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) if (tp->rcv_nxt != tp->copied_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) WRITE_ONCE(tp->rcv_nxt, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) WRITE_ONCE(tp->copied_seq, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) case TCP_REPAIR_OPTIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) if (!tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) else if (sk->sk_state == TCP_ESTABLISHED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) err = tcp_repair_options_est(sk, optval, optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) case TCP_CORK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) __tcp_sock_set_cork(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) case TCP_KEEPIDLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) err = tcp_sock_set_keepidle_locked(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) case TCP_KEEPINTVL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) if (val < 1 || val > MAX_TCP_KEEPINTVL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) tp->keepalive_intvl = val * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) case TCP_KEEPCNT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) if (val < 1 || val > MAX_TCP_KEEPCNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) tp->keepalive_probes = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) case TCP_SYNCNT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) if (val < 1 || val > MAX_TCP_SYNCNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) icsk->icsk_syn_retries = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) case TCP_SAVE_SYN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) /* 0: disable, 1: enable, 2: start from ether_header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) if (val < 0 || val > 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) tp->save_syn = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) case TCP_LINGER2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) if (val < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) tp->linger2 = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) tp->linger2 = TCP_FIN_TIMEOUT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) tp->linger2 = val * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) case TCP_DEFER_ACCEPT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) /* Translate value in seconds to number of retransmits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) icsk->icsk_accept_queue.rskq_defer_accept =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) TCP_RTO_MAX / HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) case TCP_WINDOW_CLAMP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) if (!val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) if (sk->sk_state != TCP_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) tp->window_clamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) SOCK_MIN_RCVBUF / 2 : val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) case TCP_QUICKACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) __tcp_sock_set_quickack(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) #ifdef CONFIG_TCP_MD5SIG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) case TCP_MD5SIG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) case TCP_MD5SIG_EXT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) case TCP_USER_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) /* Cap the max time in ms TCP will retry or probe the window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) * before giving up and aborting (ETIMEDOUT) a connection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) if (val < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) icsk->icsk_user_timeout = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) case TCP_FASTOPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) TCPF_LISTEN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) tcp_fastopen_init_key_once(net);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) fastopen_queue_tune(sk, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) case TCP_FASTOPEN_CONNECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) if (val > 1 || val < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) if (sk->sk_state == TCP_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) tp->fastopen_connect = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) err = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) case TCP_FASTOPEN_NO_COOKIE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) if (val > 1 || val < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) tp->fastopen_no_cookie = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) case TCP_TIMESTAMP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) if (!tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) err = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) tp->tsoffset = val - tcp_time_stamp_raw();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) case TCP_REPAIR_WINDOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) err = tcp_repair_set_window(tp, optval, optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) case TCP_NOTSENT_LOWAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) tp->notsent_lowat = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) sk->sk_write_space(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) case TCP_INQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) if (val > 1 || val < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) tp->recvmsg_inq = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) case TCP_TX_DELAY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) if (val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) tcp_enable_tx_delay();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) tp->tcp_tx_delay = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) err = -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) unsigned int optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) if (level != SOL_TCP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) return icsk->icsk_af_ops->setsockopt(sk, level, optname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) optval, optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) return do_tcp_setsockopt(sk, level, optname, optval, optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) EXPORT_SYMBOL(tcp_setsockopt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) struct tcp_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) u64 stats[__TCP_CHRONO_MAX], total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) enum tcp_chrono i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) stats[i] = tp->chrono_stat[i - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) if (i == tp->chrono_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) stats[i] += tcp_jiffies32 - tp->chrono_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) stats[i] *= USEC_PER_SEC / HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) total += stats[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) info->tcpi_busy_time = total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) /* Return information about state of tcp endpoint in API format. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) void tcp_get_info(struct sock *sk, struct tcp_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) const struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) unsigned long rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) u32 now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) u64 rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) bool slow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) memset(info, 0, sizeof(*info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) if (sk->sk_type != SOCK_STREAM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) info->tcpi_state = inet_sk_state_load(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) /* Report meaningful fields for all TCP states, including listeners */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) rate = READ_ONCE(sk->sk_pacing_rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) rate64 = (rate != ~0UL) ? rate : ~0ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) info->tcpi_pacing_rate = rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) rate = READ_ONCE(sk->sk_max_pacing_rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) rate64 = (rate != ~0UL) ? rate : ~0ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) info->tcpi_max_pacing_rate = rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) info->tcpi_reordering = tp->reordering;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) info->tcpi_snd_cwnd = tp->snd_cwnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) if (info->tcpi_state == TCP_LISTEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) /* listeners aliased fields :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) * tcpi_unacked -> Number of children ready for accept()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) * tcpi_sacked -> max backlog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) slow = lock_sock_fast(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) info->tcpi_ca_state = icsk->icsk_ca_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) info->tcpi_retransmits = icsk->icsk_retransmits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) info->tcpi_probes = icsk->icsk_probes_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) info->tcpi_backoff = icsk->icsk_backoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) if (tp->rx_opt.tstamp_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) if (tcp_is_sack(tp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) info->tcpi_options |= TCPI_OPT_SACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) if (tp->rx_opt.wscale_ok) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) info->tcpi_options |= TCPI_OPT_WSCALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) if (tp->ecn_flags & TCP_ECN_OK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) info->tcpi_options |= TCPI_OPT_ECN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) if (tp->ecn_flags & TCP_ECN_SEEN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) info->tcpi_options |= TCPI_OPT_ECN_SEEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) if (tp->syn_data_acked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) info->tcpi_options |= TCPI_OPT_SYN_DATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) info->tcpi_snd_mss = tp->mss_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) info->tcpi_unacked = tp->packets_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) info->tcpi_sacked = tp->sacked_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) info->tcpi_lost = tp->lost_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) info->tcpi_retrans = tp->retrans_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) now = tcp_jiffies32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) info->tcpi_rtt = tp->srtt_us >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) info->tcpi_rttvar = tp->mdev_us >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) info->tcpi_snd_ssthresh = tp->snd_ssthresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) info->tcpi_advmss = tp->advmss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) info->tcpi_rcv_space = tp->rcvq_space.space;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) info->tcpi_total_retrans = tp->total_retrans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) info->tcpi_bytes_acked = tp->bytes_acked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) info->tcpi_bytes_received = tp->bytes_received;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) tcp_get_info_chrono_stats(tp, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) info->tcpi_segs_out = tp->segs_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) info->tcpi_segs_in = tp->segs_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) info->tcpi_min_rtt = tcp_min_rtt(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) info->tcpi_data_segs_in = tp->data_segs_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) info->tcpi_data_segs_out = tp->data_segs_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) rate64 = tcp_compute_delivery_rate(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) if (rate64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) info->tcpi_delivery_rate = rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) info->tcpi_delivered = tp->delivered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) info->tcpi_delivered_ce = tp->delivered_ce;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) info->tcpi_bytes_sent = tp->bytes_sent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) info->tcpi_bytes_retrans = tp->bytes_retrans;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) info->tcpi_dsack_dups = tp->dsack_dups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) info->tcpi_reord_seen = tp->reord_seen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) info->tcpi_rcv_ooopack = tp->rcv_ooopack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) info->tcpi_snd_wnd = tp->snd_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) unlock_sock_fast(sk, slow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) EXPORT_SYMBOL_GPL(tcp_get_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) static size_t tcp_opt_stats_get_size(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BUSY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_RWND_LIMITED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_SNDBUF_LIMITED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DATA_SEGS_OUT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_TOTAL_RETRANS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_PACING_RATE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DELIVERY_RATE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_CWND */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) nla_total_size(sizeof(u32)) + /* TCP_NLA_REORDERING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) nla_total_size(sizeof(u32)) + /* TCP_NLA_MIN_RTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) nla_total_size(sizeof(u8)) + /* TCP_NLA_RECUR_RETRANS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) nla_total_size(sizeof(u8)) + /* TCP_NLA_DELIVERY_RATE_APP_LMT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) nla_total_size(sizeof(u32)) + /* TCP_NLA_SNDQ_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) nla_total_size(sizeof(u8)) + /* TCP_NLA_CA_STATE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) const struct sk_buff *orig_skb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) const struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) struct sk_buff *stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) struct tcp_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) unsigned long rate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) u64 rate64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) if (!stats)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) tcp_get_info_chrono_stats(tp, &info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) nla_put_u64_64bit(stats, TCP_NLA_BUSY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) info.tcpi_busy_time, TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) info.tcpi_rwnd_limited, TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) info.tcpi_sndbuf_limited, TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) tp->data_segs_out, TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) tp->total_retrans, TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) rate = READ_ONCE(sk->sk_pacing_rate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) rate64 = (rate != ~0UL) ? rate : ~0ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) rate64 = tcp_compute_delivery_rate(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) max_t(int, 0, tp->write_seq - tp->snd_nxt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) TCP_NLA_PAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) return stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) static int do_tcp_getsockopt(struct sock *sk, int level,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) int optname, char __user *optval, int __user *optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) struct tcp_sock *tp = tcp_sk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) struct net *net = sock_net(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) int val, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) len = min_t(unsigned int, len, sizeof(int));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) if (len < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) switch (optname) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) case TCP_MAXSEG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) val = tp->mss_cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) val = tp->rx_opt.user_mss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) if (tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) val = tp->rx_opt.mss_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) case TCP_NODELAY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) val = !!(tp->nonagle&TCP_NAGLE_OFF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) case TCP_CORK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) val = !!(tp->nonagle&TCP_NAGLE_CORK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) case TCP_KEEPIDLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) val = keepalive_time_when(tp) / HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) case TCP_KEEPINTVL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) val = keepalive_intvl_when(tp) / HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) case TCP_KEEPCNT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) val = keepalive_probes(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) case TCP_SYNCNT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) case TCP_LINGER2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) val = tp->linger2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) if (val >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) case TCP_DEFER_ACCEPT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) case TCP_WINDOW_CLAMP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) val = tp->window_clamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) case TCP_INFO: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) struct tcp_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) tcp_get_info(sk, &info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) len = min_t(unsigned int, len, sizeof(info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) if (copy_to_user(optval, &info, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) case TCP_CC_INFO: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) const struct tcp_congestion_ops *ca_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) union tcp_cc_info info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) size_t sz = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) int attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) ca_ops = icsk->icsk_ca_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) if (ca_ops && ca_ops->get_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) sz = ca_ops->get_info(sk, ~0U, &attr, &info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) len = min_t(unsigned int, len, sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) if (copy_to_user(optval, &info, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) case TCP_QUICKACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) val = !inet_csk_in_pingpong_mode(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) case TCP_CONGESTION:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) case TCP_ULP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) len = min_t(unsigned int, len, TCP_ULP_NAME_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) if (!icsk->icsk_ulp_ops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) if (put_user(0, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) case TCP_FASTOPEN_KEY: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) unsigned int key_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) key_len = tcp_fastopen_get_cipher(net, icsk, key) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) TCP_FASTOPEN_KEY_LENGTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) len = min_t(unsigned int, len, key_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) if (copy_to_user(optval, key, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) case TCP_THIN_LINEAR_TIMEOUTS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) val = tp->thin_lto;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) case TCP_THIN_DUPACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) case TCP_REPAIR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) val = tp->repair;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) case TCP_REPAIR_QUEUE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) if (tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) val = tp->repair_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) case TCP_REPAIR_WINDOW: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) struct tcp_repair_window opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) if (len != sizeof(opt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) if (!tp->repair)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) opt.snd_wl1 = tp->snd_wl1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) opt.snd_wnd = tp->snd_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) opt.max_window = tp->max_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) opt.rcv_wnd = tp->rcv_wnd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) opt.rcv_wup = tp->rcv_wup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) if (copy_to_user(optval, &opt, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) case TCP_QUEUE_SEQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) if (tp->repair_queue == TCP_SEND_QUEUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) val = tp->write_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) else if (tp->repair_queue == TCP_RECV_QUEUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) val = tp->rcv_nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) case TCP_USER_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) val = icsk->icsk_user_timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) case TCP_FASTOPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) val = icsk->icsk_accept_queue.fastopenq.max_qlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) case TCP_FASTOPEN_CONNECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) val = tp->fastopen_connect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) case TCP_FASTOPEN_NO_COOKIE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) val = tp->fastopen_no_cookie;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) case TCP_TX_DELAY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) val = tp->tcp_tx_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) case TCP_TIMESTAMP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) val = tcp_time_stamp_raw() + tp->tsoffset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) case TCP_NOTSENT_LOWAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) val = tp->notsent_lowat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) case TCP_INQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) val = tp->recvmsg_inq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) case TCP_SAVE_SYN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) val = tp->save_syn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) case TCP_SAVED_SYN: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) if (tp->saved_syn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) if (len < tcp_saved_syn_len(tp->saved_syn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) if (put_user(tcp_saved_syn_len(tp->saved_syn),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) optlen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) len = tcp_saved_syn_len(tp->saved_syn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) if (put_user(len, optlen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) if (copy_to_user(optval, tp->saved_syn->data, len)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) tcp_saved_syn_free(tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) case TCP_ZEROCOPY_RECEIVE: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) struct tcp_zerocopy_receive zc = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) if (get_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) if (len < 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) len < offsetofend(struct tcp_zerocopy_receive, length))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) if (len > sizeof(zc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) len = sizeof(zc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) if (copy_from_user(&zc, optval, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) err = tcp_zerocopy_receive(sk, &zc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) if (len >= offsetofend(struct tcp_zerocopy_receive, err))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) goto zerocopy_rcv_sk_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) switch (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) case offsetofend(struct tcp_zerocopy_receive, err):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) goto zerocopy_rcv_sk_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) case offsetofend(struct tcp_zerocopy_receive, inq):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) goto zerocopy_rcv_inq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) case offsetofend(struct tcp_zerocopy_receive, length):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) goto zerocopy_rcv_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) zerocopy_rcv_sk_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) zc.err = sock_error(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) zerocopy_rcv_inq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) zc.inq = tcp_inq_hint(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) zerocopy_rcv_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) if (!err && copy_to_user(optval, &zc, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) return -ENOPROTOOPT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) if (put_user(len, optlen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) if (copy_to_user(optval, &val, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) int __user *optlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) struct inet_connection_sock *icsk = inet_csk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) if (level != SOL_TCP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) return icsk->icsk_af_ops->getsockopt(sk, level, optname,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) optval, optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) return do_tcp_getsockopt(sk, level, optname, optval, optlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) EXPORT_SYMBOL(tcp_getsockopt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) #ifdef CONFIG_TCP_MD5SIG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) static DEFINE_MUTEX(tcp_md5sig_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) static bool tcp_md5sig_pool_populated = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) static void __tcp_alloc_md5sig_pool(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) struct crypto_ahash *hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) if (IS_ERR(hash))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) struct ahash_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) if (!scratch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) sizeof(struct tcphdr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) GFP_KERNEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) cpu_to_node(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) if (!scratch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) req = ahash_request_alloc(hash, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) ahash_request_set_callback(req, 0, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) /* before setting tcp_md5sig_pool_populated, we must commit all writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) * to memory. See smp_rmb() in tcp_get_md5sig_pool()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) tcp_md5sig_pool_populated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) bool tcp_alloc_md5sig_pool(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) if (unlikely(!tcp_md5sig_pool_populated)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) mutex_lock(&tcp_md5sig_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) if (!tcp_md5sig_pool_populated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) __tcp_alloc_md5sig_pool();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) if (tcp_md5sig_pool_populated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) static_branch_inc(&tcp_md5_needed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) mutex_unlock(&tcp_md5sig_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) return tcp_md5sig_pool_populated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) * tcp_get_md5sig_pool - get md5sig_pool for this user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) * We use percpu structure, so if we succeed, we exit with preemption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) * and BH disabled, to make sure another thread or softirq handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) * wont try to get same context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) if (tcp_md5sig_pool_populated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) return this_cpu_ptr(&tcp_md5sig_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) EXPORT_SYMBOL(tcp_get_md5sig_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) const struct sk_buff *skb, unsigned int header_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) struct scatterlist sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) const struct tcphdr *tp = tcp_hdr(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) struct ahash_request *req = hp->md5_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) const unsigned int head_data_len = skb_headlen(skb) > header_len ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) skb_headlen(skb) - header_len : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) const struct skb_shared_info *shi = skb_shinfo(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) struct sk_buff *frag_iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) sg_init_table(&sg, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) ahash_request_set_crypt(req, &sg, NULL, head_data_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) if (crypto_ahash_update(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) for (i = 0; i < shi->nr_frags; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) const skb_frag_t *f = &shi->frags[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) unsigned int offset = skb_frag_off(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) sg_set_page(&sg, page, skb_frag_size(f),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) offset_in_page(offset));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) if (crypto_ahash_update(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) skb_walk_frags(skb, frag_iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) EXPORT_SYMBOL(tcp_md5_hash_skb_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) struct scatterlist sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) sg_init_one(&sg, key->key, keylen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) /* We use data_race() because tcp_md5_do_add() might change key->key under us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) return data_race(crypto_ahash_update(hp->md5_req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) EXPORT_SYMBOL(tcp_md5_hash_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) void tcp_done(struct sock *sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) struct request_sock *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) /* We might be called with a new socket, after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) * inet_csk_prepare_forced_close() has been called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) * so we can not use lockdep_sock_is_held(sk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) tcp_clear_xmit_timers(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) if (req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) reqsk_fastopen_remove(sk, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) sk->sk_shutdown = SHUTDOWN_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) if (!sock_flag(sk, SOCK_DEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) sk->sk_state_change(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) inet_csk_destroy_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) EXPORT_SYMBOL_GPL(tcp_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) int tcp_abort(struct sock *sk, int err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) if (!sk_fullsock(sk)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) if (sk->sk_state == TCP_NEW_SYN_RECV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) struct request_sock *req = inet_reqsk(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) inet_csk_reqsk_queue_drop(req->rsk_listener, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) /* Don't race with userspace socket closes such as tcp_close. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) if (sk->sk_state == TCP_LISTEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) tcp_set_state(sk, TCP_CLOSE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) inet_csk_listen_stop(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) /* Don't race with BH socket closes such as inet_csk_listen_stop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) local_bh_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) bh_lock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) if (!sock_flag(sk, SOCK_DEAD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) sk->sk_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) /* This barrier is coupled with smp_rmb() in tcp_poll() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) sk->sk_error_report(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) if (tcp_need_reset(sk->sk_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) tcp_send_active_reset(sk, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) tcp_done(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) bh_unlock_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) local_bh_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) tcp_write_queue_purge(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) release_sock(sk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) EXPORT_SYMBOL_GPL(tcp_abort);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) extern struct tcp_congestion_ops tcp_reno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) static __initdata unsigned long thash_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) static int __init set_thash_entries(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) if (!str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) ret = kstrtoul(str, 0, &thash_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) __setup("thash_entries=", set_thash_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) static void __init tcp_init_mem(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) unsigned long limit = nr_free_buffer_pages() / 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) limit = max(limit, 128UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) sysctl_tcp_mem[0] = limit / 4 * 3; /* 4.68 % */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) sysctl_tcp_mem[1] = limit; /* 6.25 % */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) void __init tcp_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) int max_rshare, max_wshare, cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) unsigned long limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) sizeof_field(struct sk_buff, cb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) inet_hashinfo_init(&tcp_hashinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) thash_entries, 21, /* one slot per 2 MB*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) 0, 64 * 1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) tcp_hashinfo.bind_bucket_cachep =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) kmem_cache_create("tcp_bind_bucket",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) sizeof(struct inet_bind_bucket), 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) /* Size and allocate the main established and bind bucket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) * hash tables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) * The methodology is similar to that of the buffer cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) tcp_hashinfo.ehash =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) alloc_large_system_hash("TCP established",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) sizeof(struct inet_ehash_bucket),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) thash_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) 17, /* one slot per 128 KB of memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) &tcp_hashinfo.ehash_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) thash_entries ? 0 : 512 * 1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) for (i = 0; i <= tcp_hashinfo.ehash_mask; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) if (inet_ehash_locks_alloc(&tcp_hashinfo))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) panic("TCP: failed to alloc ehash_locks");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) tcp_hashinfo.bhash =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) alloc_large_system_hash("TCP bind",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) sizeof(struct inet_bind_hashbucket),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) tcp_hashinfo.ehash_mask + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) 17, /* one slot per 128 KB of memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) &tcp_hashinfo.bhash_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) 64 * 1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) spin_lock_init(&tcp_hashinfo.bhash[i].lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) cnt = tcp_hashinfo.ehash_mask + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) sysctl_tcp_max_orphans = cnt / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) tcp_init_mem();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) /* Set per-socket limits to no more than 1/128 the pressure threshold */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) max_wshare = min(4UL*1024*1024, limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) max_rshare = min(6UL*1024*1024, limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) pr_info("Hash tables configured (established %u bind %u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) tcp_v4_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) tcp_metrics_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) tcp_tasklet_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) mptcp_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) }