Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Shared application/kernel submission and completion ring pairs, for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * supporting fast/efficient IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  * A note on the read/write ordering memory barriers that are matched between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * the application and kernel side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * After the application reads the CQ ring tail, it must use an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  * before writing the tail (using smp_load_acquire to read the tail will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * do). It also needs a smp_mb() before updating CQ head (ordering the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  * entry load(s) with the head store), pairing with an implicit barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  * through a control-dependency in io_get_cqring (smp_store_release to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  * store head will do). Failure to do so could lead to reading invalid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  * CQ entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  * Likewise, the application must use an appropriate smp_wmb() before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  * writing the SQ tail (ordering SQ entry stores with the tail store),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20)  * which pairs with smp_load_acquire in io_get_sqring (smp_store_release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21)  * to store the tail will do). And it needs a barrier ordering the SQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22)  * head load before writing new SQ entries (smp_load_acquire to read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23)  * head will do).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25)  * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27)  * updating the SQ tail; a full memory barrier smp_mb() is needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  * between.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  * Also see the examples in the liburing library:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  *	git://git.kernel.dk/liburing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * from data shared between the kernel and application. This is done both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  * for ordering purposes, but also to ensure that once a value is loaded from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  * data that the application could potentially modify, it remains stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  * Copyright (C) 2018-2019 Jens Axboe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  * Copyright (c) 2018-2019 Christoph Hellwig
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <linux/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #include <net/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <linux/refcount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include <linux/bits.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #include <linux/fdtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) #include <linux/bvec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) #include <linux/net.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) #include <net/sock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #include <net/af_unix.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) #include <net/scm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) #include <linux/anon_inodes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) #include <linux/sizes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) #include <linux/namei.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) #include <linux/fsnotify.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) #include <linux/fadvise.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) #include <linux/eventpoll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) #include <linux/fs_struct.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) #include <linux/splice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) #include <linux/task_work.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) #include <linux/io_uring.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) #include <trace/events/io_uring.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) #include <uapi/linux/io_uring.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) #include "io-wq.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) #define IORING_MAX_ENTRIES	32768
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) #define IORING_MAX_CQ_ENTRIES	(2 * IORING_MAX_ENTRIES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98)  * Shift of 9 is 512 entries, or exactly one page on 64-bit archs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) #define IORING_FILE_TABLE_SHIFT	9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) #define IORING_MAX_FILES_TABLE	(1U << IORING_FILE_TABLE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) #define IORING_FILE_TABLE_MASK	(IORING_MAX_FILES_TABLE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) #define IORING_MAX_FIXED_FILES	(64 * IORING_MAX_FILES_TABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 				 IORING_REGISTER_LAST + IORING_OP_LAST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) struct io_uring {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	u32 head ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	u32 tail ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113)  * This data is shared with the application through the mmap at offsets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116)  * The offsets to the member fields are published through struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117)  * io_sqring_offsets when calling io_uring_setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) struct io_rings {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	 * Head and tail offsets into the ring; the offsets need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	 * masked to get valid indices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	 * The kernel controls head of the sq ring and the tail of the cq ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	 * and the application controls tail of the sq ring and the head of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	 * cq ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	struct io_uring		sq, cq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	 * Bitmasks to apply to head and tail offsets (constant, equals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	 * ring_entries - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	u32			sq_ring_mask, cq_ring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	/* Ring sizes (constant, power of 2) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	u32			sq_ring_entries, cq_ring_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	 * Number of invalid entries dropped by the kernel due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	 * invalid index stored in array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	 * Written by the kernel, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	 * application (i.e. get number of "new events" by comparing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	 * cached value).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	 * After a new SQ head value was read by the application this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	 * counter includes all submissions that were dropped reaching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	 * the new SQ head (and possibly more).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	u32			sq_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	 * Runtime SQ flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	 * Written by the kernel, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	 * application.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	 * The application needs a full memory barrier before checking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	 * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	u32			sq_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	 * Runtime CQ flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	 * Written by the application, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	 * kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	u32                     cq_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	 * Number of completion events lost because the queue was full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	 * this should be avoided by the application by making sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	 * there are not more requests pending than there is space in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	 * the completion queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	 * Written by the kernel, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	 * application (i.e. get number of "new events" by comparing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	 * cached value).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	 * As completion events come in out of order this counter is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	 * ordered with any other data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	u32			cq_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	 * Ring buffer of completion events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	 * The kernel writes completion events fresh every time they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	 * produced, so the application is allowed to modify pending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	 * entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	struct io_uring_cqe	cqes[] ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) struct io_mapped_ubuf {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	u64		ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	size_t		len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	struct		bio_vec *bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 	unsigned int	nr_bvecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	unsigned long	acct_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) struct fixed_file_table {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	struct file		**files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) struct fixed_file_ref_node {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	struct percpu_ref		refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	struct list_head		node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	struct list_head		file_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	struct fixed_file_data		*file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	struct llist_node		llist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	bool				done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) struct fixed_file_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	struct fixed_file_table		*table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	struct io_ring_ctx		*ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	struct fixed_file_ref_node	*node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	struct percpu_ref		refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	struct completion		done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	struct list_head		ref_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	spinlock_t			lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) struct io_buffer {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	__u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	__u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	__u16 bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) struct io_restriction {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	u8 sqe_flags_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	u8 sqe_flags_required;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	bool registered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) struct io_sq_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	refcount_t		refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	struct mutex		lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	/* ctx's that are using this sqd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	struct list_head	ctx_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	struct list_head	ctx_new_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	struct mutex		ctx_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	struct task_struct	*thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	struct wait_queue_head	wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) struct io_ring_ctx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 		struct percpu_ref	refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 		unsigned int		flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		unsigned int		compat: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 		unsigned int		limit_mem: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 		unsigned int		cq_overflow_flushed: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		unsigned int		drain_next: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		unsigned int		eventfd_async: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 		unsigned int		restricted: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 		unsigned int		sqo_dead: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 		 * Ring buffer of indices into array of io_uring_sqe, which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 		 * mmapped by the application using the IORING_OFF_SQES offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		 * This indirection could e.g. be used to assign fixed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		 * io_uring_sqe entries to operations and only submit them to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		 * the queue when needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 		 * The kernel modifies neither the indices array nor the entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		 * array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 		u32			*sq_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 		unsigned		cached_sq_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		unsigned		sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 		unsigned		sq_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 		unsigned		sq_thread_idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 		unsigned		cached_sq_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 		unsigned		cached_cq_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 		unsigned long		sq_check_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 		struct list_head	defer_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 		struct list_head	timeout_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 		struct list_head	cq_overflow_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 		struct io_uring_sqe	*sq_sqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	struct io_rings	*rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	/* IO offload */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	struct io_wq		*io_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	 * For SQPOLL usage - we hold a reference to the parent task, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	 * have access to the ->files
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	struct task_struct	*sqo_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	/* Only used for accounting purposes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	struct mm_struct	*mm_account;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	struct cgroup_subsys_state	*sqo_blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	struct wait_queue_head	sqo_sq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	struct wait_queue_entry	sqo_wait_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	struct list_head	sqd_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 	 * readers must ensure that ->refs is alive as long as the file* is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	 * used. Only updated through io_uring_register(2).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	struct fixed_file_data	*file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	unsigned		nr_user_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	/* if used, fixed mapped user buffers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	unsigned		nr_user_bufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	struct io_mapped_ubuf	*user_bufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	struct user_struct	*user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	const struct cred	*creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	kuid_t			loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	unsigned int		sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	struct completion	ref_comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	struct completion	sq_thread_comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	/* if all else fails... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	struct io_kiocb		*fallback_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	struct socket		*ring_sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	struct xarray		io_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	struct xarray		personalities;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	u32			pers_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 		unsigned		cached_cq_tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 		unsigned		cq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 		unsigned		cq_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		atomic_t		cq_timeouts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 		unsigned		cq_last_tm_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 		unsigned long		cq_check_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 		struct wait_queue_head	cq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		struct fasync_struct	*cq_fasync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 		struct eventfd_ctx	*cq_ev_fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		struct mutex		uring_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		wait_queue_head_t	wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		spinlock_t		completion_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 		 * ->iopoll_list is protected by the ctx->uring_lock for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 		 * io_uring instances that don't use IORING_SETUP_SQPOLL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 		 * For SQPOLL, only the single threaded io_sq_thread() will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 		 * manipulate the list, hence no extra locking is needed there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 		struct list_head	iopoll_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 		struct hlist_head	*cancel_hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 		unsigned		cancel_hash_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 		bool			poll_multi_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 		spinlock_t		inflight_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		struct list_head	inflight_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	struct delayed_work		file_put_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	struct llist_head		file_put_llist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	struct work_struct		exit_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	struct io_restriction		restrictions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395)  * First field must be the file pointer in all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396)  * iocb unions! See also 'struct kiocb' in <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) struct io_poll_iocb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		struct wait_queue_head	*head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 		u64			addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	__poll_t			events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	bool				done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	bool				canceled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	struct wait_queue_entry		wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) struct io_close {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	struct file			*put_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	int				fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) struct io_timeout_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	struct io_kiocb			*req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	struct hrtimer			timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	struct timespec64		ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	enum hrtimer_mode		mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) struct io_accept {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	struct sockaddr __user		*addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	int __user			*addr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	int				flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	unsigned long			nofile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) struct io_sync {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	loff_t				len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	loff_t				off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	int				flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	int				mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) struct io_cancel {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	u64				addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) struct io_timeout {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	u32				off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	u32				target_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	struct list_head		list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) struct io_timeout_rem {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	u64				addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) struct io_rw {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	/* NOTE: kiocb has the file as the first member, so don't do it here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	struct kiocb			kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	u64				addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	u64				len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) struct io_connect {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	struct sockaddr __user		*addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	int				addr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) struct io_sr_msg {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		struct user_msghdr __user *umsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 		void __user		*buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	int				msg_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	int				bgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	size_t				len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	struct io_buffer		*kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) struct io_open {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	int				dfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	bool				ignore_nonblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	struct filename			*filename;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	struct open_how			how;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	unsigned long			nofile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) struct io_files_update {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	u64				arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	u32				nr_args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	u32				offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) struct io_fadvise {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	u64				offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	u32				len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	u32				advice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) struct io_madvise {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	u64				addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	u32				len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	u32				advice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) struct io_epoll {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	int				epfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	int				op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	int				fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	struct epoll_event		event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) struct io_splice {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	struct file			*file_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	struct file			*file_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	loff_t				off_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	loff_t				off_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	u64				len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	unsigned int			flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) struct io_provide_buf {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 	__u64				addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 	__u32				len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	__u32				bgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	__u16				nbufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	__u16				bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) struct io_statx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	int				dfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	unsigned int			mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	unsigned int			flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	const char __user		*filename;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	struct statx __user		*buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) struct io_completion {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	struct file			*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	struct list_head		list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	u32				cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) struct io_async_connect {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	struct sockaddr_storage		address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) struct io_async_msghdr {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	struct iovec			fast_iov[UIO_FASTIOV];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	struct iovec			*iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	struct sockaddr __user		*uaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	struct msghdr			msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	struct sockaddr_storage		addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) struct io_async_rw {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	struct iovec			fast_iov[UIO_FASTIOV];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	const struct iovec		*free_iovec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	struct iov_iter			iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 	size_t				bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	struct wait_page_queue		wpq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	REQ_F_FIXED_FILE_BIT	= IOSQE_FIXED_FILE_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	REQ_F_IO_DRAIN_BIT	= IOSQE_IO_DRAIN_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	REQ_F_LINK_BIT		= IOSQE_IO_LINK_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	REQ_F_HARDLINK_BIT	= IOSQE_IO_HARDLINK_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	REQ_F_FORCE_ASYNC_BIT	= IOSQE_ASYNC_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	REQ_F_BUFFER_SELECT_BIT	= IOSQE_BUFFER_SELECT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	REQ_F_LINK_HEAD_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	REQ_F_FAIL_LINK_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	REQ_F_INFLIGHT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	REQ_F_CUR_POS_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	REQ_F_NOWAIT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	REQ_F_LINK_TIMEOUT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	REQ_F_ISREG_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	REQ_F_NEED_CLEANUP_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	REQ_F_POLLED_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	REQ_F_BUFFER_SELECTED_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	REQ_F_NO_FILE_TABLE_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	REQ_F_WORK_INITIALIZED_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	REQ_F_LTIMEOUT_ACTIVE_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	/* not a real bit, just to check we're not overflowing the space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	__REQ_F_LAST_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	/* ctx owns file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	REQ_F_FIXED_FILE	= BIT(REQ_F_FIXED_FILE_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	/* drain existing IO first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	REQ_F_IO_DRAIN		= BIT(REQ_F_IO_DRAIN_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	/* linked sqes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	REQ_F_LINK		= BIT(REQ_F_LINK_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	/* doesn't sever on completion < 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	REQ_F_HARDLINK		= BIT(REQ_F_HARDLINK_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	/* IOSQE_ASYNC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	REQ_F_FORCE_ASYNC	= BIT(REQ_F_FORCE_ASYNC_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	/* IOSQE_BUFFER_SELECT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	REQ_F_BUFFER_SELECT	= BIT(REQ_F_BUFFER_SELECT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	/* head of a link */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	REQ_F_LINK_HEAD		= BIT(REQ_F_LINK_HEAD_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	/* fail rest of links */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	REQ_F_FAIL_LINK		= BIT(REQ_F_FAIL_LINK_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	/* on inflight list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	REQ_F_INFLIGHT		= BIT(REQ_F_INFLIGHT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	/* read/write uses file position */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	REQ_F_CUR_POS		= BIT(REQ_F_CUR_POS_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	/* must not punt to workers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	REQ_F_NOWAIT		= BIT(REQ_F_NOWAIT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	/* has or had linked timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	REQ_F_LINK_TIMEOUT	= BIT(REQ_F_LINK_TIMEOUT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	/* regular file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	REQ_F_ISREG		= BIT(REQ_F_ISREG_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	/* needs cleanup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	REQ_F_NEED_CLEANUP	= BIT(REQ_F_NEED_CLEANUP_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	/* already went through poll handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	REQ_F_POLLED		= BIT(REQ_F_POLLED_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	/* buffer already selected */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	REQ_F_BUFFER_SELECTED	= BIT(REQ_F_BUFFER_SELECTED_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	/* doesn't need file table for this request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	REQ_F_NO_FILE_TABLE	= BIT(REQ_F_NO_FILE_TABLE_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	/* io_wq_work is initialized */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	REQ_F_WORK_INITIALIZED	= BIT(REQ_F_WORK_INITIALIZED_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	/* linked timeout is active, i.e. prepared by link's head */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	REQ_F_LTIMEOUT_ACTIVE	= BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) struct async_poll {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	struct io_poll_iocb	poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	struct io_poll_iocb	*double_poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646)  * NOTE! Each of the iocb union members has the file pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647)  * as the first entry in their struct definition. So you can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648)  * access the file pointer through any of the sub-structs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649)  * or directly as just 'ki_filp' in this struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) struct io_kiocb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		struct file		*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		struct io_rw		rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		struct io_poll_iocb	poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		struct io_accept	accept;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		struct io_sync		sync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 		struct io_cancel	cancel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 		struct io_timeout	timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 		struct io_timeout_rem	timeout_rem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		struct io_connect	connect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		struct io_sr_msg	sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		struct io_open		open;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 		struct io_close		close;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		struct io_files_update	files_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		struct io_fadvise	fadvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 		struct io_madvise	madvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 		struct io_epoll		epoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 		struct io_splice	splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		struct io_provide_buf	pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 		struct io_statx		statx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 		/* use only after cleaning per-op data, see io_clean_op() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		struct io_completion	compl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	/* opcode allocated if it needs to store data for async defer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	void				*async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	u8				opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	/* polled IO has completed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	u8				iopoll_completed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	u16				buf_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	u32				result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	struct io_ring_ctx		*ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	unsigned int			flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	refcount_t			refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	struct task_struct		*task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	u64				user_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	struct list_head		link_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	 * 1. used with ctx->iopoll_list with reads/writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	 * 2. to track reqs with ->files (see io_op_def::file_table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	struct list_head		inflight_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	struct percpu_ref		*fixed_file_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	struct callback_head		task_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	struct hlist_node		hash_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	struct async_poll		*apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	struct io_wq_work		work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) struct io_defer_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	struct list_head	list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	struct io_kiocb		*req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	u32			seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) #define IO_IOPOLL_BATCH			8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) struct io_comp_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	unsigned int		nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	struct list_head	list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	struct io_ring_ctx	*ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) struct io_submit_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	struct blk_plug		plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	 * io_kiocb alloc cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	void			*reqs[IO_IOPOLL_BATCH];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 	unsigned int		free_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	 * Batch completion logic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	struct io_comp_state	comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	 * File reference cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	struct file		*file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	unsigned int		fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	unsigned int		has_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	unsigned int		ios_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) struct io_op_def {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	/* needs req->file assigned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	unsigned		needs_file : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	/* don't fail if file grab fails */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	unsigned		needs_file_no_error : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	/* hash wq insertion if file is a regular file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	unsigned		hash_reg_file : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	/* unbound wq insertion if file is a non-regular file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	unsigned		unbound_nonreg_file : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	/* opcode is not supported by this kernel */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	unsigned		not_supported : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	/* set if opcode supports polled "wait" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	unsigned		pollin : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	unsigned		pollout : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	/* op supports buffer selection */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	unsigned		buffer_select : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	/* must always have async data allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	unsigned		needs_async_data : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	/* size of async data needed, if any */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	unsigned short		async_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	unsigned		work_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) static const struct io_op_def io_op_defs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	[IORING_OP_NOP] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	[IORING_OP_READV] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		.pollin			= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 		.buffer_select		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		.needs_async_data	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 		.async_size		= sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	[IORING_OP_WRITEV] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		.hash_reg_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 		.pollout		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 		.needs_async_data	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 		.async_size		= sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 						IO_WQ_WORK_FSIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	[IORING_OP_FSYNC] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		.work_flags		= IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	[IORING_OP_READ_FIXED] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		.pollin			= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 		.async_size		= sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 		.work_flags		= IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	[IORING_OP_WRITE_FIXED] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 		.hash_reg_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 		.pollout		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		.async_size		= sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		.work_flags		= IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 						IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 	[IORING_OP_POLL_ADD] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	[IORING_OP_POLL_REMOVE] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	[IORING_OP_SYNC_FILE_RANGE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		.work_flags		= IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	[IORING_OP_SENDMSG] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 		.pollout		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 		.needs_async_data	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		.async_size		= sizeof(struct io_async_msghdr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 						IO_WQ_WORK_FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	[IORING_OP_RECVMSG] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 		.pollin			= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		.buffer_select		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		.needs_async_data	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		.async_size		= sizeof(struct io_async_msghdr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 						IO_WQ_WORK_FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	[IORING_OP_TIMEOUT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 		.needs_async_data	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		.async_size		= sizeof(struct io_timeout_data),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 		.work_flags		= IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	[IORING_OP_TIMEOUT_REMOVE] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 	[IORING_OP_ACCEPT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 		.pollin			= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_FILES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	[IORING_OP_ASYNC_CANCEL] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	[IORING_OP_LINK_TIMEOUT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 		.needs_async_data	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 		.async_size		= sizeof(struct io_timeout_data),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		.work_flags		= IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	[IORING_OP_CONNECT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		.pollout		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		.needs_async_data	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		.async_size		= sizeof(struct io_async_connect),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 		.work_flags		= IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	[IORING_OP_FALLOCATE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 		.work_flags		= IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	[IORING_OP_OPENAT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 						IO_WQ_WORK_FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	[IORING_OP_CLOSE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		.needs_file_no_error	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	[IORING_OP_FILES_UPDATE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	[IORING_OP_STATX] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_MM |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 						IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	[IORING_OP_READ] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		.pollin			= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 		.buffer_select		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 		.async_size		= sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	[IORING_OP_WRITE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		.hash_reg_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		.pollout		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		.async_size		= sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 						IO_WQ_WORK_FSIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	[IORING_OP_FADVISE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 		.work_flags		= IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	[IORING_OP_MADVISE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	[IORING_OP_SEND] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 		.pollout		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	[IORING_OP_RECV] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		.pollin			= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 		.buffer_select		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		.work_flags		= IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	[IORING_OP_OPENAT2] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 		.work_flags		= IO_WQ_WORK_FILES | IO_WQ_WORK_FS |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 						IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	[IORING_OP_EPOLL_CTL] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 		.work_flags		= IO_WQ_WORK_FILES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	[IORING_OP_SPLICE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 		.hash_reg_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 		.work_flags		= IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	[IORING_OP_PROVIDE_BUFFERS] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	[IORING_OP_REMOVE_BUFFERS] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	[IORING_OP_TEE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 		.needs_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 		.hash_reg_file		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 		.unbound_nonreg_file	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) enum io_mem_account {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	ACCT_LOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	ACCT_PINNED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 			struct io_ring_ctx *ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 			     struct io_comp_state *cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) static void io_cqring_fill_event(struct io_kiocb *req, long res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) static void io_put_req(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) static void io_put_req_deferred(struct io_kiocb *req, int nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) static void io_double_put_req(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) static void __io_queue_linked_timeout(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) static void io_queue_linked_timeout(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) static int __io_sqe_files_update(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 				 struct io_uring_files_update *ip,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 				 unsigned nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) static void __io_clean_op(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) static struct file *io_file_get(struct io_submit_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 				struct io_kiocb *req, int fd, bool fixed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) static void io_file_put_work(struct work_struct *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 			       struct iovec **iovec, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 			       bool needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 			     const struct iovec *fast_iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 			     struct iov_iter *iter, bool force);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) static void io_req_drop_files(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) static void io_req_task_queue(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) static struct kmem_cache *req_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) static const struct file_operations io_uring_fops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) struct sock *io_uring_get_socket(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	if (file->f_op == &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 		return ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) EXPORT_SYMBOL(io_uring_get_socket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) static inline void io_clean_op(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 		__io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static inline bool __io_match_files(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 				    struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	if (req->file && req->file->f_op == &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	return ((req->flags & REQ_F_WORK_INITIALIZED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	        (req->work.flags & IO_WQ_WORK_FILES)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 		req->work.identity->files == files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	bool got = percpu_ref_tryget(ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	/* already at zero, wait for ->release() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	if (!got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		wait_for_completion(compl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	percpu_ref_resurrect(ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		percpu_ref_put(ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) static bool io_match_task(struct io_kiocb *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 			  struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 			  struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	struct io_kiocb *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	if (task && head->task != task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 		/* in terms of cancelation, always match if req task is dead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		if (head->task->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	if (!files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	if (__io_match_files(head, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	if (head->flags & REQ_F_LINK_HEAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 		list_for_each_entry(link, &head->link_list, link_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 			if (__io_match_files(link, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 				return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) static void io_sq_thread_drop_mm(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	if (mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		kthread_unuse_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 		mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		current->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	if (current->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	/* Should never happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	task_lock(ctx->sqo_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	mm = ctx->sqo_task->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	if (unlikely(!mm || !mmget_not_zero(mm)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 		mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	task_unlock(ctx->sqo_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	if (mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 		kthread_use_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 				   struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	return __io_sq_thread_acquire_mm(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 					 struct cgroup_subsys_state **cur_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	/* puts the old one when swapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	if (*cur_css != ctx->sqo_blkcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 		kthread_associate_blkcg(ctx->sqo_blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 		*cur_css = ctx->sqo_blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) static void io_sq_thread_unassociate_blkcg(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	kthread_associate_blkcg(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) static inline void req_set_fail_links(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 		req->flags |= REQ_F_FAIL_LINK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)  * None of these are dereferenced, they are simply used to check if any of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)  * them have changed. If we're under current and check they are still the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)  * same, we're fine to grab references to them for actual out-of-line use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) static void io_init_identity(struct io_identity *id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	id->files = current->files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	id->mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	id->blkcg_css = blkcg_css();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	id->creds = current_cred();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	id->nsproxy = current->nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	id->fs = current->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	id->fsize = rlimit(RLIMIT_FSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	id->loginuid = current->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	id->sessionid = current->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	refcount_set(&id->count, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) static inline void __io_req_init_async(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	memset(&req->work, 0, sizeof(req->work));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	req->flags |= REQ_F_WORK_INITIALIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)  * Note: must call io_req_init_async() for the first time you
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)  * touch any members of io_wq_work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) static inline void io_req_init_async(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	struct io_uring_task *tctx = req->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	if (req->flags & REQ_F_WORK_INITIALIZED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	__io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	/* Grab a ref if this isn't our static identity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	req->work.identity = tctx->identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	if (tctx->identity != &tctx->__identity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 		refcount_inc(&req->work.identity->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) static inline bool io_async_submit(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	return ctx->flags & IORING_SETUP_SQPOLL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) static void io_ring_ctx_ref_free(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	complete(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) static inline bool io_is_timeout_noseq(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	return !req->timeout.off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	int hash_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	ctx->fallback_req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	if (!ctx->fallback_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	 * Use 5 bits less than the max cq entries, that should give us around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	 * 32 entries per hash list if totally full and uniformly spread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	hash_bits = ilog2(p->cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	hash_bits -= 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	if (hash_bits <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 		hash_bits = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	ctx->cancel_hash_bits = hash_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 					GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	if (!ctx->cancel_hash)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	__hash_init(ctx->cancel_hash, 1U << hash_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	ctx->flags = p->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	init_waitqueue_head(&ctx->sqo_sq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	INIT_LIST_HEAD(&ctx->sqd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	init_waitqueue_head(&ctx->cq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	init_completion(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	init_completion(&ctx->sq_thread_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 	xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	mutex_init(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	init_waitqueue_head(&ctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	spin_lock_init(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	INIT_LIST_HEAD(&ctx->iopoll_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	INIT_LIST_HEAD(&ctx->defer_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	INIT_LIST_HEAD(&ctx->timeout_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	spin_lock_init(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	INIT_LIST_HEAD(&ctx->inflight_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	INIT_DELAYED_WORK(&ctx->file_put_work, io_file_put_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	init_llist_head(&ctx->file_put_llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	return ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	if (ctx->fallback_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 		kmem_cache_free(req_cachep, ctx->fallback_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	kfree(ctx->cancel_hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) static bool req_need_defer(struct io_kiocb *req, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 		return seq != ctx->cached_cq_tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 				+ READ_ONCE(ctx->cached_cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) static void __io_commit_cqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	/* order cqe stores with ring update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	smp_store_release(&rings->cq.tail, ctx->cached_cq_tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	if (req->work.identity == &tctx->__identity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	if (refcount_dec_and_test(&req->work.identity->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		kfree(req->work.identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) static void io_req_clean_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	if (!(req->flags & REQ_F_WORK_INITIALIZED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	req->flags &= ~REQ_F_WORK_INITIALIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	if (req->work.flags & IO_WQ_WORK_MM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		mmdrop(req->work.identity->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 		req->work.flags &= ~IO_WQ_WORK_MM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	if (req->work.flags & IO_WQ_WORK_BLKCG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 		css_put(req->work.identity->blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		req->work.flags &= ~IO_WQ_WORK_BLKCG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	if (req->work.flags & IO_WQ_WORK_CREDS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 		put_cred(req->work.identity->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 		req->work.flags &= ~IO_WQ_WORK_CREDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	if (req->work.flags & IO_WQ_WORK_FS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 		struct fs_struct *fs = req->work.identity->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 		spin_lock(&req->work.identity->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 		if (--fs->users)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 			fs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		spin_unlock(&req->work.identity->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 		if (fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 			free_fs_struct(fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 		req->work.flags &= ~IO_WQ_WORK_FS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	if (req->flags & REQ_F_INFLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 		io_req_drop_files(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	io_put_identity(req->task->io_uring, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)  * Create a private copy of io_identity, since some fields don't match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)  * the current context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) static bool io_identity_cow(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	const struct cred *creds = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	struct io_identity *id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	if (req->work.flags & IO_WQ_WORK_CREDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 		creds = req->work.identity->creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	if (unlikely(!id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 		req->work.flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	 * We can safely just re-init the creds we copied  Either the field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	 * matches the current one, or we haven't grabbed it yet. The only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	 * exception is ->creds, through registered personalities, so handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	 * that one separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	io_init_identity(id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	if (creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 		id->creds = creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	/* add one for this request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	refcount_inc(&id->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	/* drop tctx and req identity references, if needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	if (tctx->identity != &tctx->__identity &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	    refcount_dec_and_test(&tctx->identity->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 		kfree(tctx->identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	if (req->work.identity != &tctx->__identity &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	    refcount_dec_and_test(&req->work.identity->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 		kfree(req->work.identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	req->work.identity = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	tctx->identity = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) static bool io_grab_identity(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	const struct io_op_def *def = &io_op_defs[req->opcode];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	struct io_identity *id = req->work.identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	if (def->work_flags & IO_WQ_WORK_FSIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 		if (id->fsize != rlimit(RLIMIT_FSIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 		req->work.flags |= IO_WQ_WORK_FSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	    (def->work_flags & IO_WQ_WORK_BLKCG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 		if (id->blkcg_css != blkcg_css()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 		 * This should be rare, either the cgroup is dying or the task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		 * is moving cgroups. Just punt to root for the handful of ios.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 		if (css_tryget_online(id->blkcg_css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 			req->work.flags |= IO_WQ_WORK_BLKCG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	if (!(req->work.flags & IO_WQ_WORK_CREDS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		if (id->creds != current_cred())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 		get_cred(id->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		req->work.flags |= IO_WQ_WORK_CREDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	if (!uid_eq(current->loginuid, id->loginuid) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	    current->sessionid != id->sessionid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	if (!(req->work.flags & IO_WQ_WORK_FS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	    (def->work_flags & IO_WQ_WORK_FS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 		if (current->fs != id->fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 		spin_lock(&id->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 		if (!id->fs->in_exec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 			id->fs->users++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 			req->work.flags |= IO_WQ_WORK_FS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 			req->work.flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 		spin_unlock(&current->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 	if (!(req->work.flags & IO_WQ_WORK_FILES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	    (def->work_flags & IO_WQ_WORK_FILES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	    !(req->flags & REQ_F_NO_FILE_TABLE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 		if (id->files != current->files ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 		    id->nsproxy != current->nsproxy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 		atomic_inc(&id->files->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 		get_nsproxy(id->nsproxy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 		if (!(req->flags & REQ_F_INFLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 			req->flags |= REQ_F_INFLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 			spin_lock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 			list_add(&req->inflight_entry, &ctx->inflight_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 			spin_unlock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		req->work.flags |= IO_WQ_WORK_FILES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	if (!(req->work.flags & IO_WQ_WORK_MM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 	    (def->work_flags & IO_WQ_WORK_MM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 		if (id->mm != current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		mmgrab(id->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		req->work.flags |= IO_WQ_WORK_MM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) static void io_prep_async_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	const struct io_op_def *def = &io_op_defs[req->opcode];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	struct io_identity *id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	id = req->work.identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 	if (req->flags & REQ_F_FORCE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 		req->work.flags |= IO_WQ_WORK_CONCURRENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	if (req->flags & REQ_F_ISREG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 		if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 			io_wq_hash_work(&req->work, file_inode(req->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	} else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 		if (def->unbound_nonreg_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 			req->work.flags |= IO_WQ_WORK_UNBOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	/* if we fail grabbing identity, we must COW, regrab, and retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	if (io_grab_identity(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 	if (!io_identity_cow(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	/* can't fail at this point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	if (!io_grab_identity(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 		WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) static void io_prep_async_link(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	struct io_kiocb *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	io_prep_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	if (req->flags & REQ_F_LINK_HEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 		list_for_each_entry(cur, &req->link_list, link_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 			io_prep_async_work(cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	struct io_kiocb *link = io_prep_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 					&req->work, req->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 	io_wq_enqueue(ctx->io_wq, &req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	return link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) static void io_queue_async_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	struct io_kiocb *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	/* init ->work of the whole link before punting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	io_prep_async_link(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	link = __io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	if (link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		io_queue_linked_timeout(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) static void io_kill_timeout(struct io_kiocb *req, int status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	struct io_timeout_data *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	ret = hrtimer_try_to_cancel(&io->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	if (ret != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 		if (status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 			req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 		atomic_set(&req->ctx->cq_timeouts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 			atomic_read(&req->ctx->cq_timeouts) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 		list_del_init(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 		io_cqring_fill_event(req, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 		io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524)  * Returns true if we found and killed one or more timeouts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 			     struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 	int canceled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 		if (io_match_task(req, tsk, files)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 			io_kill_timeout(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 			canceled++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 	return canceled != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) static void __io_queue_deferred(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 						struct io_defer_entry, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		if (req_need_defer(de->req, de->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		list_del_init(&de->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 		io_req_task_queue(de->req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		kfree(de);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 	} while (!list_empty(&ctx->defer_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) static void io_flush_timeouts(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 	struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 	u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 	if (list_empty(&ctx->timeout_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 		u32 events_needed, events_got;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 		if (io_is_timeout_noseq(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 		 * Since seq can easily wrap around over time, subtract
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 		 * the last seq at which timeouts were flushed before comparing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 		 * Assuming not more than 2^31-1 events have happened since,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 		 * these subtractions won't have wrapped, so we can check if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 		 * target is in [last_seq, current_seq] by comparing the two.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 		events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 		events_got = seq - ctx->cq_last_tm_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 		if (events_got < events_needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 		io_kill_timeout(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	ctx->cq_last_tm_flush = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) static void io_commit_cqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	io_flush_timeouts(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	__io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 	if (unlikely(!list_empty(&ctx->defer_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 		__io_queue_deferred(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) static inline bool io_sqring_full(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	struct io_rings *r = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	unsigned tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	tail = ctx->cached_cq_tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	 * writes to the cq entry need to come after reading head; the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	 * control dependency is enough as we're using WRITE_ONCE to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	 * fill the cq entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	if (tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	ctx->cached_cq_tail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	return &rings->cqes[tail & ctx->cq_mask];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 	if (!ctx->cq_ev_fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 	if (!ctx->eventfd_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	return io_wq_current_is_worker();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	if (wq_has_sleeper(&ctx->cq_wait)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 		wake_up_interruptible(&ctx->cq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 		kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 	if (waitqueue_active(&ctx->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 		wake_up(&ctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 		wake_up(&ctx->sq_data->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	if (io_should_trigger_evfd(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 		eventfd_signal(ctx->cq_ev_fd, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	if (list_empty(&ctx->cq_overflow_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 		clear_bit(0, &ctx->sq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 		clear_bit(0, &ctx->cq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) /* Returns true if there are no backlogged entries after the flush */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 				       struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 				       struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 	struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 	struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 	struct io_uring_cqe *cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 	LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 	if (!force) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 		if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 		    rings->cq_ring_entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	cqe = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 	list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 		if (!io_match_task(req, tsk, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 		cqe = io_get_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 		if (!cqe && !force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 		list_move(&req->compl.list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		if (cqe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 			WRITE_ONCE(cqe->user_data, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 			WRITE_ONCE(cqe->res, req->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 			WRITE_ONCE(cqe->flags, req->compl.cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 			ctx->cached_cq_overflow++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 			WRITE_ONCE(ctx->rings->cq_overflow,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 				   ctx->cached_cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 	io_cqring_mark_overflow(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	while (!list_empty(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 		req = list_first_entry(&list, struct io_kiocb, compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 		list_del(&req->compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 		io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 	return cqe != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 				     struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 				     struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	if (test_bit(0, &ctx->cq_check_overflow)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 		/* iopoll syncs against uring_lock, not completion_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 		if (ctx->flags & IORING_SETUP_IOPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 			mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 		__io_cqring_overflow_flush(ctx, force, tsk, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 		if (ctx->flags & IORING_SETUP_IOPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 			mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) static void __io_cqring_fill_event(struct io_kiocb *req, long res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 				   unsigned int cflags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	struct io_uring_cqe *cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 	trace_io_uring_complete(ctx, req->user_data, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 	 * If we can't get a cq entry, userspace overflowed the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	 * submission (by quite a lot). Increment the overflow count in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 	 * the ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 	cqe = io_get_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 	if (likely(cqe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 		WRITE_ONCE(cqe->user_data, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		WRITE_ONCE(cqe->res, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 		WRITE_ONCE(cqe->flags, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	} else if (ctx->cq_overflow_flushed ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 		   atomic_read(&req->task->io_uring->in_idle)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 		 * If we're in ring overflow flush mode, or in task cancel mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 		 * then we cannot store the request for later flushing, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 		 * to drop it on the floor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 		ctx->cached_cq_overflow++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 		WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 		if (list_empty(&ctx->cq_overflow_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 			set_bit(0, &ctx->sq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 			set_bit(0, &ctx->cq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 			ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 		io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 		req->result = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 		req->compl.cflags = cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 		refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 		list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) static void io_cqring_fill_event(struct io_kiocb *req, long res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 	__io_cqring_fill_event(req, res, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	__io_cqring_fill_event(req, res, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) static void io_submit_flush_completions(struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 	struct io_ring_ctx *ctx = cs->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 	while (!list_empty(&cs->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 		struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 		req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 		list_del(&req->compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 		__io_cqring_fill_event(req, req->result, req->compl.cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 		 * io_free_req() doesn't care about completion_lock unless one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 		 * of these flags is set. REQ_F_WORK_INITIALIZED is in the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 		 * because of a potential deadlock with req->work.fs->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 		if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 				 |REQ_F_WORK_INITIALIZED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 			spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 			io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 			spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 			io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 	cs->nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 			      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	if (!cs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 		io_cqring_add_event(req, res, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 		io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 		io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 		req->result = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 		req->compl.cflags = cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 		list_add_tail(&req->compl.list, &cs->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 		if (++cs->nr >= 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 			io_submit_flush_completions(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) static void io_req_complete(struct io_kiocb *req, long res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	__io_req_complete(req, res, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static inline bool io_is_fallback_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 	return req == (struct io_kiocb *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 			((unsigned long) req->ctx->fallback_req & ~1UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 	req = ctx->fallback_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	if (!test_and_set_bit_lock(0, (unsigned long *) &ctx->fallback_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 				     struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	if (!state->free_reqs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 		gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 		size_t sz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 		 * Bulk alloc is all-or-nothing. If we fail to get a batch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 		 * retry single alloc to be on the safe side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 		if (unlikely(ret <= 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 			state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 			if (!state->reqs[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 				goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 			ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 		state->free_reqs = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 	state->free_reqs--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	return state->reqs[state->free_reqs];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) fallback:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	return io_get_fallback_req(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) static inline void io_put_file(struct io_kiocb *req, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 			  bool fixed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 	if (fixed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 		percpu_ref_put(req->fixed_file_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 		fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) static void io_dismantle_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 	io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 	if (req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 		kfree(req->async_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 	if (req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 		io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 	io_req_clean_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) static void __io_free_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 	struct io_uring_task *tctx = req->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 	io_dismantle_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 	percpu_counter_dec(&tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 	if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 		wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 	put_task_struct(req->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	if (likely(!io_is_fallback_req(req)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 		kmem_cache_free(req_cachep, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 		clear_bit_unlock(0, (unsigned long *) &ctx->fallback_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 	percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) static void io_kill_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 	struct io_kiocb *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 	bool cancelled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	link = list_first_entry_or_null(&req->link_list, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 					link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	 * Can happen if a linked timeout fired and link had been like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 	 * req -> link t-out -> link t-out [-> ...]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 		struct io_timeout_data *io = link->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 		list_del_init(&link->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 		ret = hrtimer_try_to_cancel(&io->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		if (ret != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 			io_cqring_fill_event(link, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 			io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 			cancelled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	req->flags &= ~REQ_F_LINK_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	if (cancelled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 		io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 		io_put_req(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 	struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 	 * The list should never be empty when we are called here. But could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 	 * potentially happen if the chain is messed up, check to be on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 	 * safe side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 	if (unlikely(list_empty(&req->link_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	nxt = list_first_entry(&req->link_list, struct io_kiocb, link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	list_del_init(&req->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 	if (!list_empty(&nxt->link_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 		nxt->flags |= REQ_F_LINK_HEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	return nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)  * Called if REQ_F_LINK_HEAD is set, and we fail the head request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) static void io_fail_links(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 	while (!list_empty(&req->link_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		struct io_kiocb *link = list_first_entry(&req->link_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 						struct io_kiocb, link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 		list_del_init(&link->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 		trace_io_uring_fail_link(req, link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 		io_cqring_fill_event(link, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 		 * It's ok to free under spinlock as they're not linked anymore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 		 * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 		 * work.fs->lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 		if (link->flags & REQ_F_WORK_INITIALIZED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 			io_put_req_deferred(link, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 			io_double_put_req(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 	req->flags &= ~REQ_F_LINK_HEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 	if (req->flags & REQ_F_LINK_TIMEOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 		io_kill_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 	 * If LINK is set, we have dependent requests in this chain. If we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 	 * didn't fail this request, queue the first one up, moving any other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 	 * dependencies to the next request. In case of failure, fail the rest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 	 * of the chain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 	if (likely(!(req->flags & REQ_F_FAIL_LINK)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 		return io_req_link_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	io_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) static struct io_kiocb *io_req_find_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 	if (likely(!(req->flags & REQ_F_LINK_HEAD)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	return __io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 	struct task_struct *tsk = req->task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 	enum task_work_notify_mode notify;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 	if (tsk->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 		return -ESRCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 	 * SQPOLL kernel thread doesn't need notification, just a wakeup. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 	 * all other cases, use TWA_SIGNAL unconditionally to ensure we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 	 * processing task_work. There's no reliable way to tell if TWA_RESUME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 	 * will do the job.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 	notify = TWA_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 	if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 		notify = TWA_SIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 	ret = task_work_add(tsk, &req->task_work, notify);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 		wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) static void __io_req_task_cancel(struct io_kiocb *req, int error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	io_cqring_fill_event(req, error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 	req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	io_double_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) static void io_req_task_cancel(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	__io_req_task_cancel(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) static void __io_req_task_submit(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 	if (!ctx->sqo_dead && !__io_sq_thread_acquire_mm(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 		__io_queue_sqe(req, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 		__io_req_task_cancel(req, -EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 	if (ctx->flags & IORING_SETUP_SQPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 		io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) static void io_req_task_submit(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 	__io_req_task_submit(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) static void io_req_task_queue(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	init_task_work(&req->task_work, io_req_task_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 	percpu_ref_get(&req->ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 	ret = io_req_task_work_add(req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 	if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 		struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 		init_task_work(&req->task_work, io_req_task_cancel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 		tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 		task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 		wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) static void io_queue_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 	struct io_kiocb *nxt = io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 	if (nxt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 		io_req_task_queue(nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) static void io_free_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 	io_queue_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	__io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) struct req_batch {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	void *reqs[IO_IOPOLL_BATCH];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 	int to_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 	struct task_struct	*task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 	int			task_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) static inline void io_init_req_batch(struct req_batch *rb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 	rb->to_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 	rb->task_refs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 	rb->task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) static void __io_req_free_batch_flush(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 				      struct req_batch *rb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 	kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 	percpu_ref_put_many(&ctx->refs, rb->to_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 	rb->to_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 				     struct req_batch *rb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	if (rb->to_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 		__io_req_free_batch_flush(ctx, rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	if (rb->task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 		struct io_uring_task *tctx = rb->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 		percpu_counter_sub(&tctx->inflight, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 		if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 			wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 		put_task_struct_many(rb->task, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 		rb->task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 	if (unlikely(io_is_fallback_req(req))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 		io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 	if (req->flags & REQ_F_LINK_HEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 		io_queue_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 	if (req->task != rb->task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 		if (rb->task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 			struct io_uring_task *tctx = rb->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 			percpu_counter_sub(&tctx->inflight, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 			if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 				wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 			put_task_struct_many(rb->task, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 		rb->task = req->task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 		rb->task_refs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 	rb->task_refs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 	io_dismantle_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 	rb->reqs[rb->to_free++] = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 	if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 		__io_req_free_batch_flush(req->ctx, rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)  * Drop reference to request, return next in chain (if there is one) if this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)  * was the last reference to this request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) static struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 	struct io_kiocb *nxt = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	if (refcount_dec_and_test(&req->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 		nxt = io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 		__io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 	return nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) static void io_put_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 	if (refcount_dec_and_test(&req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 		io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) static void io_put_req_deferred_cb(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 	io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) static void io_free_req_deferred(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 	init_task_work(&req->task_work, io_put_req_deferred_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 	ret = io_req_task_work_add(req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 	if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 		struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 		tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 		task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 		wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	if (refcount_sub_and_test(refs, &req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 		io_free_req_deferred(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) static struct io_wq_work *io_steal_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 	struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 	 * A ref is owned by io-wq in which context we're. So, if that's the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 	 * last one, it's safe to steal next work. False negatives are Ok,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	 * it just will be re-punted async in io_put_work()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 	if (refcount_read(&req->refs) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 	nxt = io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 	return nxt ? &nxt->work : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) static void io_double_put_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 	/* drop both submit and complete references */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	if (refcount_sub_and_test(2, &req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 		io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) static unsigned io_cqring_events(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 	struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 	/* See comment at the top of this file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 	return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	/* make sure SQ entry isn't read before tail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) static unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 	unsigned int cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	cflags |= IORING_CQE_F_BUFFER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 	req->flags &= ~REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	kfree(kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	return cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 	struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 	kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 	return io_put_kbuf(req, kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) static inline bool io_run_task_work(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 	 * Not safe to run on exiting task, and the task_work handling will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 	 * not add work to such a task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	if (unlikely(current->flags & PF_EXITING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	if (current->task_works) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 		__set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 		task_work_run();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) static void io_iopoll_queue(struct list_head *again)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 		req = list_first_entry(again, struct io_kiocb, inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 		list_del(&req->inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 		__io_complete_rw(req, -EAGAIN, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 	} while (!list_empty(again));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)  * Find and free completed poll iocbs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 			       struct list_head *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 	struct req_batch rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 	LIST_HEAD(again);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 	/* order with ->result store in io_complete_rw_iopoll() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 	io_init_req_batch(&rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	while (!list_empty(done)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 		int cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 		req = list_first_entry(done, struct io_kiocb, inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 		if (READ_ONCE(req->result) == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 			req->result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 			req->iopoll_completed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 			list_move_tail(&req->inflight_entry, &again);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 		list_del(&req->inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 		if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 			cflags = io_put_rw_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 		__io_cqring_fill_event(req, req->result, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 		(*nr_events)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 		if (refcount_dec_and_test(&req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 			io_req_free_batch(&rb, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 	if (ctx->flags & IORING_SETUP_SQPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 		io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 	io_req_free_batch_finish(ctx, &rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 	if (!list_empty(&again))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 		io_iopoll_queue(&again);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 			long min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 	struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 	LIST_HEAD(done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 	bool spin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 	 * Only spin for completions if we don't have multiple devices hanging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 	 * off our complete list, and we're under the requested amount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 	spin = !ctx->poll_multi_file && *nr_events < min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 	list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 		struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 		 * Move completed and retryable entries to our local lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 		 * If we find a request that requires polling, break out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 		 * and complete those lists first, if we have entries there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 		if (READ_ONCE(req->iopoll_completed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 			list_move_tail(&req->inflight_entry, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 		if (!list_empty(&done))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 		ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 		/* iopoll may have completed current req */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 		if (READ_ONCE(req->iopoll_completed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 			list_move_tail(&req->inflight_entry, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 		if (ret && spin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 			spin = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 	if (!list_empty(&done))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 		io_iopoll_complete(ctx, nr_events, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)  * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450)  * non-spinning poll check - we'll still enter the driver poll loop, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451)  * as a non-spinning completion check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 				long min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 	while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 		ret = io_do_iopoll(ctx, nr_events, min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 		if (*nr_events >= min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470)  * We can't just wait for polled events to come to us, we have to actively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)  * find and complete them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 	if (!(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	while (!list_empty(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 		unsigned int nr_events = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 		io_do_iopoll(ctx, &nr_events, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 		/* let it sleep and repeat later if can't complete a request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 		if (nr_events == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 		 * Ensure we allow local-to-the-cpu processing to take place,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 		 * in this case we need to ensure that we reap all events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 		 * Also let task_work, etc. to progress by releasing the mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 		if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 			mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 			mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 	unsigned int nr_events = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	int iters = 0, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 	 * We disallow the app entering submit/complete with polling, but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 	 * still need to lock the ring to prevent racing with polled issue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 	 * that got punted to a workqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 		 * Don't enter poll loop if we already have events pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 		 * If we do, we can potentially be spinning for commands that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 		 * already triggered a CQE (eg in error).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 		if (test_bit(0, &ctx->cq_check_overflow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 			__io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 		if (io_cqring_events(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 		 * If a submit got punted to a workqueue, we can have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 		 * application entering polling for a command before it gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 		 * issued. That app will hold the uring_lock for the duration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 		 * of the poll right here, so we need to take a breather every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 		 * now and then to ensure that the issue has a chance to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 		 * the poll to the issued list. Otherwise we can spin here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 		 * forever, while the workqueue is stuck trying to acquire the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 		 * very same mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 		if (!(++iters & 7)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 			mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 			io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 			mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 		ret = io_iopoll_getevents(ctx, &nr_events, min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 		if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 	} while (min && !nr_events && !need_resched());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) static void kiocb_end_write(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 	 * Tell lockdep we inherited freeze protection from submission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 	 * thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) 	if (req->flags & REQ_F_ISREG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 		struct inode *inode = file_inode(req->file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 		__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 	file_end_write(req->file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) static void io_complete_rw_common(struct kiocb *kiocb, long res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 				  struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 	int cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 	if (kiocb->ki_flags & IOCB_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 		kiocb_end_write(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	if (res != req->result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 	if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 		cflags = io_put_rw_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 	__io_req_complete(req, res, cflags, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) static bool io_resubmit_prep(struct io_kiocb *req, int error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 	ssize_t ret = -ECANCELED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 	struct iov_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 	int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 	if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 		ret = error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 		goto end_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 	switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 	case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 	case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 	case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 		rw = READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 	case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 	case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 	case IORING_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 		rw = WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 		printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 				req->opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 		goto end_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 	if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 		ret = io_import_iovec(rw, req, &iovec, &iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 			goto end_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 		ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 		if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 		kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) end_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 	req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) static bool io_rw_reissue(struct io_kiocb *req, long res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 	umode_t mode = file_inode(req->file)->i_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 	if (!S_ISBLK(mode) && !S_ISREG(mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 	if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 	 * If ref is dying, we might be running poll reap from the exit work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 	 * Don't attempt to reissue from that path, just let it fail with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 	 * -EAGAIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 	if (percpu_ref_is_dying(&req->ctx->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 	ret = io_sq_thread_acquire_mm(req->ctx, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 	if (io_resubmit_prep(req, ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 		refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 			     struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 	if (!io_rw_reissue(req, res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 		io_complete_rw_common(&req->rw.kiocb, res, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	__io_complete_rw(req, res, res2, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 	if (kiocb->ki_flags & IOCB_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 		kiocb_end_write(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 	if (res != -EAGAIN && res != req->result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 	WRITE_ONCE(req->result, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 	/* order with io_poll_complete() checking ->result */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 	WRITE_ONCE(req->iopoll_completed, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687)  * After the iocb has been issued, it's safe to be found on the poll list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688)  * Adding the kiocb to the list AFTER submission ensures that we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689)  * find it from a io_iopoll_getevents() thread before the issuer is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690)  * accessing the kiocb cookie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) static void io_iopoll_req_issued(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 	 * Track whether we have multiple files in our lists. This will impact
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 	 * how we do polling eventually, not spinning if we're on potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 	 * different devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 	if (list_empty(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 		ctx->poll_multi_file = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 	} else if (!ctx->poll_multi_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 		struct io_kiocb *list_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 		list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 						inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 		if (list_req->file != req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 			ctx->poll_multi_file = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 	 * For fast devices, IO may have already completed. If it has, add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 	 * it to the front so we find it first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 	if (READ_ONCE(req->iopoll_completed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 		list_add(&req->inflight_entry, &ctx->iopoll_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 		list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 	if ((ctx->flags & IORING_SETUP_SQPOLL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 	    wq_has_sleeper(&ctx->sq_data->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 		wake_up(&ctx->sq_data->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) static void __io_state_file_put(struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 	if (state->has_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) 		fput_many(state->file, state->has_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 	state->file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) static inline void io_state_file_put(struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 	if (state->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 		__io_state_file_put(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740)  * Get as many references to a file as we have IOs left in this submission,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741)  * assuming most submissions are for one file, or at least that each file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742)  * has more than one submission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) static struct file *__io_file_get(struct io_submit_state *state, int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 	if (!state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 		return fget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 	if (state->file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 		if (state->fd == fd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 			state->has_refs--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 			return state->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 		__io_state_file_put(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 	state->file = fget_many(fd, state->ios_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 	if (!state->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 	state->fd = fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 	state->has_refs = state->ios_left - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 	return state->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) static bool io_bdev_nowait(struct block_device *bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 	return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775)  * If we tracked the file through the SCM inflight mechanism, we could support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776)  * any file. For now, just ensure that anything potentially problematic is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777)  * inline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) static bool io_file_supports_async(struct file *file, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) 	umode_t mode = file_inode(file)->i_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 	if (S_ISBLK(mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) 		if (io_bdev_nowait(file->f_inode->i_bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 	if (S_ISSOCK(mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 	if (S_ISREG(mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 		if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 		    file->f_op != &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 	/* any ->read/write should understand O_NONBLOCK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 	if (file->f_flags & O_NONBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 	if (!(file->f_mode & FMODE_NOWAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	if (rw == READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 		return file->f_op->read_iter != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 	return file->f_op->write_iter != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 	struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 	unsigned ioprio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 	if (S_ISREG(file_inode(req->file)->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 		req->flags |= REQ_F_ISREG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 	kiocb->ki_pos = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 	if (kiocb->ki_pos == -1 && !(req->file->f_mode & FMODE_STREAM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 		req->flags |= REQ_F_CUR_POS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 		kiocb->ki_pos = req->file->f_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 	kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 	ioprio = READ_ONCE(sqe->ioprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 	if (ioprio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 		ret = ioprio_check_cap(ioprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 		kiocb->ki_ioprio = ioprio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 		kiocb->ki_ioprio = get_current_ioprio();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 	/* don't allow async punt if RWF_NOWAIT was requested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 	if (kiocb->ki_flags & IOCB_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 		req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 	if (ctx->flags & IORING_SETUP_IOPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 		if (!(kiocb->ki_flags & IOCB_DIRECT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 		    !kiocb->ki_filp->f_op->iopoll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 			return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 		kiocb->ki_flags |= IOCB_HIPRI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 		kiocb->ki_complete = io_complete_rw_iopoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 		req->iopoll_completed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 		if (kiocb->ki_flags & IOCB_HIPRI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 		kiocb->ki_complete = io_complete_rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 	req->rw.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 	req->rw.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 	req->buf_index = READ_ONCE(sqe->buf_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) 	switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 	case -EIOCBQUEUED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) 	case -ERESTARTSYS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 	case -ERESTARTNOINTR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 	case -ERESTARTNOHAND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) 	case -ERESTART_RESTARTBLOCK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 		 * We can't just restart the syscall, since previously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 		 * submitted sqes may already be in progress. Just fail this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 		 * IO with EINTR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 		kiocb->ki_complete(kiocb, ret, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 		       struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 	struct io_async_rw *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 	/* add previously done IO, if any */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 	if (io && io->bytes_done > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 			ret = io->bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) 			ret += io->bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 	if (req->flags & REQ_F_CUR_POS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 		req->file->f_pos = kiocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 	if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 		__io_complete_rw(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 		io_rw_done(kiocb, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 			       struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 	size_t len = req->rw.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 	struct io_mapped_ubuf *imu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 	u16 index, buf_index = req->buf_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 	size_t offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 	u64 buf_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 	if (unlikely(buf_index >= ctx->nr_user_bufs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 	index = array_index_nospec(buf_index, ctx->nr_user_bufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 	imu = &ctx->user_bufs[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 	buf_addr = req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 	/* overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 	if (buf_addr + len < buf_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 	/* not inside the mapped region */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 	if (buf_addr < imu->ubuf || buf_addr + len > imu->ubuf + imu->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 	 * May not be a start of buffer, set size appropriately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 	 * and advance us to the beginning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 	offset = buf_addr - imu->ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 	if (offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 		 * Don't use iov_iter_advance() here, as it's really slow for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 		 * using the latter parts of a big fixed buffer - it iterates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 		 * over each segment manually. We can cheat a bit here, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 		 * we know that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 		 * 1) it's a BVEC iter, we set it up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 		 * 2) all bvecs are PAGE_SIZE in size, except potentially the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 		 *    first and last bvec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 		 * So just find our index, and adjust the iterator afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 		 * If the offset is within the first bvec (or the whole first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 		 * bvec, just use iov_iter_advance(). This makes it easier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 		 * since we can just skip the first segment, which may not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 		 * be PAGE_SIZE aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 		const struct bio_vec *bvec = imu->bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 		if (offset <= bvec->bv_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) 			iov_iter_advance(iter, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) 			unsigned long seg_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) 			/* skip first vec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 			offset -= bvec->bv_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 			seg_skip = 1 + (offset >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 			iter->bvec = bvec + seg_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) 			iter->nr_segs -= seg_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 			iter->count -= bvec->bv_len + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) 			iter->iov_offset = offset & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) 	return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 	if (needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 		mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) 	 * "Normal" inline submissions always hold the uring_lock, since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) 	 * grab it from the system call. Same is true for the SQPOLL offload.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) 	 * The only exception is when we've detached the request and issue it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) 	 * from an async worker thread, grab the lock for that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) 	if (needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) 		mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) 					  int bgid, struct io_buffer *kbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) 					  bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 	struct io_buffer *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) 	if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 		return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 	io_ring_submit_lock(req->ctx, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 	lockdep_assert_held(&req->ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 	head = xa_load(&req->ctx->io_buffers, bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 	if (head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) 		if (!list_empty(&head->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 			kbuf = list_last_entry(&head->list, struct io_buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) 							list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) 			list_del(&kbuf->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 			kbuf = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) 			xa_erase(&req->ctx->io_buffers, bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 		if (*len > kbuf->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) 			*len = kbuf->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 		kbuf = ERR_PTR(-ENOBUFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 	io_ring_submit_unlock(req->ctx, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) 	return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 					bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) 	struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) 	u16 bgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) 	kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) 	bgid = req->buf_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) 	kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) 	if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) 		return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) 	req->rw.addr = (u64) (unsigned long) kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) 	req->flags |= REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) 	return u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) 				bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) 	struct compat_iovec __user *uiov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) 	compat_ssize_t clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) 	void __user *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) 	ssize_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) 	uiov = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) 	if (!access_ok(uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) 	if (__get_user(clen, &uiov->iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) 	if (clen < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) 	len = clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) 	buf = io_rw_buffer_select(req, &len, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) 	if (IS_ERR(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) 		return PTR_ERR(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) 	iov[0].iov_base = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) 	iov[0].iov_len = (compat_size_t) len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) 				      bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) 	struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) 	void __user *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) 	ssize_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) 	if (copy_from_user(iov, uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) 	len = iov[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) 	if (len < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) 	buf = io_rw_buffer_select(req, &len, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) 	if (IS_ERR(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) 		return PTR_ERR(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) 	iov[0].iov_base = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) 	iov[0].iov_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) 				    bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) 	if (req->flags & REQ_F_BUFFER_SELECTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) 		struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) 		kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) 		iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) 		iov[0].iov_len = kbuf->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) 	if (req->rw.len != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) 	if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) 		return io_compat_import(req, iov, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) 	return __io_iov_buffer_select(req, iov, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) 				 struct iovec **iovec, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) 				 bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) 	void __user *buf = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) 	size_t sqe_len = req->rw.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) 	u8 opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) 	opcode = req->opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) 	if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) 		*iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) 		return io_import_fixed(req, rw, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) 	/* buffer index only valid with fixed read/write, or buffer select  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) 	if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) 	if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) 		if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) 			buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) 			if (IS_ERR(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) 				return PTR_ERR(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) 			req->rw.len = sqe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) 		ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) 		*iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) 	if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) 		ret = io_iov_buffer_select(req, *iovec, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) 		if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) 			ret = (*iovec)->iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) 			iov_iter_init(iter, rw, *iovec, 1, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) 		*iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) 	return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) 			      req->ctx->compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) 			       struct iovec **iovec, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) 			       bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) 	struct io_async_rw *iorw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) 	if (!iorw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) 		return __io_import_iovec(rw, req, iovec, iter, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) 	*iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) 	return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178)  * For files that don't have ->read_iter() and ->write_iter(), handle them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179)  * by looping over ->read() or ->write() manually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) 	struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) 	struct file *file = req->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) 	ssize_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) 	 * Don't support polled IO through this interface, and we can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) 	 * support non-blocking either. For the latter, this just causes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) 	 * the kiocb to be handled from an async context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) 	if (kiocb->ki_flags & IOCB_HIPRI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) 	if (kiocb->ki_flags & IOCB_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) 	while (iov_iter_count(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) 		struct iovec iovec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) 		ssize_t nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) 		if (!iov_iter_is_bvec(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) 			iovec = iov_iter_iovec(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) 			iovec.iov_base = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) 			iovec.iov_len = req->rw.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) 		if (rw == READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) 			nr = file->f_op->read(file, iovec.iov_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) 					      iovec.iov_len, io_kiocb_ppos(kiocb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) 			nr = file->f_op->write(file, iovec.iov_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) 					       iovec.iov_len, io_kiocb_ppos(kiocb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) 		if (nr < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) 			if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) 				ret = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) 		ret += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) 		if (!iov_iter_is_bvec(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) 			iov_iter_advance(iter, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) 			req->rw.addr += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) 			req->rw.len -= nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) 			if (!req->rw.len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) 		if (nr != iovec.iov_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) 			  const struct iovec *fast_iov, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) 	struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) 	memcpy(&rw->iter, iter, sizeof(*iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) 	rw->free_iovec = iovec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) 	rw->bytes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) 	/* can only be fixed buffers, no need to do anything */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) 	if (iov_iter_is_bvec(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) 	if (!iovec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) 		unsigned iov_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) 		rw->iter.iov = rw->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) 		if (iter->iov != fast_iov) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) 			iov_off = iter->iov - fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) 			rw->iter.iov += iov_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) 		if (rw->fast_iov != fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) 			memcpy(rw->fast_iov + iov_off, fast_iov + iov_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) 			       sizeof(struct iovec) * iter->nr_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) 		req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) static inline int __io_alloc_async_data(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) 	WARN_ON_ONCE(!io_op_defs[req->opcode].async_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) 	req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) 	return req->async_data == NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) static int io_alloc_async_data(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) 	if (!io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) 	return  __io_alloc_async_data(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) 			     const struct iovec *fast_iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) 			     struct iov_iter *iter, bool force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) 	if (!force && !io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) 	if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) 		if (__io_alloc_async_data(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) 		io_req_map_rw(req, iovec, fast_iov, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) 	struct io_async_rw *iorw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) 	struct iovec *iov = iorw->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) 	ret = __io_import_iovec(rw, req, &iov, &iorw->iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) 	if (unlikely(ret < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) 	iorw->bytes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) 	iorw->free_iovec = iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) 	if (iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) 		req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) 	ret = io_prep_rw(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) 	if (unlikely(!(req->file->f_mode & FMODE_READ)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) 	/* either don't need iovec imported or already have it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) 	if (!req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) 	return io_rw_prep_async(req, READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329)  * This is our waitqueue callback handler, registered through lock_page_async()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330)  * when we initially tried to do the IO with the iocb armed our waitqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331)  * This gets called when the page is unlocked, and we generally expect that to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332)  * happen when the page IO is completed and the page is now uptodate. This will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333)  * queue a task_work based retry of the operation, attempting to copy the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334)  * again. If the latter fails because the page was NOT uptodate, then we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335)  * do a thread based blocking retry of the operation. That's the unexpected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336)  * slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) 			     int sync, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) 	struct wait_page_queue *wpq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) 	struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) 	struct wait_page_key *key = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) 	wpq = container_of(wait, struct wait_page_queue, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) 	if (!wake_page_match(wpq, key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) 	req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) 	list_del_init(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) 	init_task_work(&req->task_work, io_req_task_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) 	percpu_ref_get(&req->ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) 	/* submit ref gets dropped, acquire a new one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) 	refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) 	ret = io_req_task_work_add(req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) 	if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) 		struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) 		/* queue just for cancelation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) 		init_task_work(&req->task_work, io_req_task_cancel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) 		tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) 		task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) 		wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373)  * This controls whether a given IO request should be armed for async page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374)  * based retry. If we return false here, the request is handed to the async
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375)  * worker threads for retry. If we're doing buffered reads on a regular file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376)  * we prepare a private wait_page_queue entry and retry the operation. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377)  * will either succeed because the page is now uptodate and unlocked, or it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378)  * will register a callback when the page is unlocked at IO completion. Through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379)  * that callback, io_uring uses task_work to setup a retry of the operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380)  * That retry will attempt the buffered read again. The retry will generally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381)  * succeed, or in rare cases where it fails, we then fall back to using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382)  * async worker threads for a blocking retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) static bool io_rw_should_retry(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) 	struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) 	struct wait_page_queue *wait = &rw->wpq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) 	struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) 	/* never retry for NOWAIT, we just complete with -EAGAIN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) 	if (req->flags & REQ_F_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) 	/* Only for buffered IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) 	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) 	 * just use poll if we can, and don't attempt if the fs doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) 	 * support callback based unlocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) 	if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) 	wait->wait.func = io_async_buf_func;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) 	wait->wait.private = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) 	wait->wait.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) 	INIT_LIST_HEAD(&wait->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) 	kiocb->ki_flags |= IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) 	kiocb->ki_flags &= ~IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) 	kiocb->ki_waitq = wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) 	if (req->file->f_op->read_iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) 		return call_read_iter(req->file, &req->rw.kiocb, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) 	else if (req->file->f_op->read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) 		return loop_rw_iter(READ, req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) static int io_read(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) 		   struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) 	struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) 	struct iov_iter __iter, *iter = &__iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) 	struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) 	ssize_t io_size, ret, ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) 	bool no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) 	if (rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) 		iter = &rw->iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) 	ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) 	io_size = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) 	req->result = io_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) 	/* Ensure we clear previously set non-block flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) 	if (!force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) 		kiocb->ki_flags &= ~IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) 		kiocb->ki_flags |= IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) 	/* If the file doesn't support async, just async punt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) 	no_async = force_nonblock && !io_file_supports_async(req->file, READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) 	if (no_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) 		goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) 	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) 	ret = io_iter_do_read(req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) 	if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) 	} else if (ret == -EIOCBQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) 	} else if (ret == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) 		/* IOPOLL retry should happen for io-wq threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) 		if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) 		/* no retry on NONBLOCK marked file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) 		if (req->file->f_flags & O_NONBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) 		/* some cases will consume bytes even on error returns */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) 		iov_iter_revert(iter, io_size - iov_iter_count(iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) 		goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) 	} else if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) 		/* make sure -ERESTARTSYS -> -EINTR is done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) 	/* read it all, or we did blocking attempt. no retry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) 	if (!iov_iter_count(iter) || !force_nonblock ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) 	    (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) 	io_size -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) copy_iov:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) 	ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) 	if (ret2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) 		ret = ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) 	if (no_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) 	rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) 	/* it's copied and will be cleaned with ->io */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) 	iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) 	/* now use our persistent iterator, if we aren't already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) 	iter = &rw->iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) 	rw->bytes_done += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) 	/* if we can retry, do so with the callbacks armed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) 	if (!io_rw_should_retry(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) 		kiocb->ki_flags &= ~IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) 	 * Now retry read with the IOCB_WAITQ parts set in the iocb. If we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) 	 * get -EIOCBQUEUED, then we'll get a notification when the desired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) 	 * page gets unlocked. We can also get a partial read here, and if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) 	 * do, then just retry at the new offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) 	ret = io_iter_do_read(req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) 	if (ret == -EIOCBQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) 	} else if (ret > 0 && ret < io_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) 		/* we got some bytes, but not all. retry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) 		kiocb->ki_flags &= ~IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) 	kiocb_done(kiocb, ret, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) 	/* it's reportedly faster than delegating the null check to kfree() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) 	if (iovec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) 		kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) 	ret = io_prep_rw(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) 	if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) 	/* either don't need iovec imported or already have it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) 	if (!req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) 	return io_rw_prep_async(req, WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) static int io_write(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) 		    struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) 	struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) 	struct iov_iter __iter, *iter = &__iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) 	struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) 	ssize_t ret, ret2, io_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) 	if (rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) 		iter = &rw->iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) 	ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) 	io_size = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) 	req->result = io_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) 	/* Ensure we clear previously set non-block flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) 	if (!force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) 		kiocb->ki_flags &= ~IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) 		kiocb->ki_flags |= IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) 	/* If the file doesn't support async, just async punt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) 	if (force_nonblock && !io_file_supports_async(req->file, WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) 		goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) 	/* file path doesn't support NOWAIT for non-direct_IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) 	if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) 	    (req->flags & REQ_F_ISREG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) 		goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) 	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) 	 * Open-code file_start_write here to grab freeze protection,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) 	 * which will be released by another thread in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) 	 * io_complete_rw().  Fool lockdep by telling it the lock got
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) 	 * released so that it doesn't complain about the held lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) 	 * we return to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) 	if (req->flags & REQ_F_ISREG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) 		sb_start_write(file_inode(req->file)->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) 		__sb_writers_release(file_inode(req->file)->i_sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) 					SB_FREEZE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) 	kiocb->ki_flags |= IOCB_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) 	if (req->file->f_op->write_iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) 		ret2 = call_write_iter(req->file, kiocb, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) 	else if (req->file->f_op->write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) 		ret2 = loop_rw_iter(WRITE, req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) 		ret2 = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) 	 * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) 	 * retry them without IOCB_NOWAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) 	if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) 		ret2 = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) 	/* no retry on NONBLOCK marked file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) 	if (ret2 == -EAGAIN && (req->file->f_flags & O_NONBLOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) 	if (!force_nonblock || ret2 != -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) 		/* IOPOLL retry should happen for io-wq threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) 		if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) 			goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) 		kiocb_done(kiocb, ret2, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) copy_iov:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) 		/* some cases will consume bytes even on error returns */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) 		iov_iter_revert(iter, io_size - iov_iter_count(iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) 		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) 		if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) 			return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) 	/* it's reportedly faster than delegating the null check to kfree() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) 	if (iovec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) 		kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) static int __io_splice_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) 			    const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) 	struct io_splice* sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) 	unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) 	sp->file_in = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) 	sp->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) 	sp->flags = READ_ONCE(sqe->splice_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) 	if (unlikely(sp->flags & ~valid_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) 	sp->file_in = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) 				  (sp->flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) 	if (!sp->file_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) 	req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) 	if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) 		 * Splice operation will be punted aync, and here need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) 		 * modify io_wq_work.flags, so initialize io_wq_work firstly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) 		io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) 		req->work.flags |= IO_WQ_WORK_UNBOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) static int io_tee_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) 		       const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) 	if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) 	return __io_splice_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) static int io_tee(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) 	struct io_splice *sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) 	struct file *in = sp->file_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) 	struct file *out = sp->file_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) 	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) 	if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) 	if (sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) 		ret = do_tee(in, out, sp->len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) 	io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) 	req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) 	if (ret != sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) 	struct io_splice* sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) 	sp->off_in = READ_ONCE(sqe->splice_off_in);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) 	sp->off_out = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) 	return __io_splice_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) static int io_splice(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) 	struct io_splice *sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) 	struct file *in = sp->file_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) 	struct file *out = sp->file_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) 	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) 	loff_t *poff_in, *poff_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) 	if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) 	poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) 	poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) 	if (sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) 		ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) 	io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) 	req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) 	if (ret != sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742)  * IORING_OP_NOP just posts a completion event, nothing else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) static int io_nop(struct io_kiocb *req, struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) 	__io_req_complete(req, 0, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) 	if (!req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) 	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) 		     sqe->splice_fd_in))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) 	req->sync.flags = READ_ONCE(sqe->fsync_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) 	if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) 	req->sync.off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) 	req->sync.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) static int io_fsync(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) 	loff_t end = req->sync.off + req->sync.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) 	/* fsync always requires a blocking context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) 	if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) 	ret = vfs_fsync_range(req->file, req->sync.off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) 				end > 0 ? end : LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) 				req->sync.flags & IORING_FSYNC_DATASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) static int io_fallocate_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) 			     const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) 	if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) 	    sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) 	req->sync.off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) 	req->sync.len = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) 	req->sync.mode = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) 	/* fallocate always requiring blocking context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) 	if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) 	ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) 				req->sync.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) 	const char __user *fname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) 	if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) 	if (unlikely(req->flags & REQ_F_FIXED_FILE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) 	/* open.how should be already initialised */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) 	if (!(req->open.how.flags & O_PATH) && force_o_largefile())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) 		req->open.how.flags |= O_LARGEFILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) 	req->open.dfd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) 	fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) 	req->open.filename = getname(fname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) 	if (IS_ERR(req->open.filename)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) 		ret = PTR_ERR(req->open.filename);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) 		req->open.filename = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) 	req->open.nofile = rlimit(RLIMIT_NOFILE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) 	req->open.ignore_nonblock = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) 	req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) 	u64 flags, mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) 	mode = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) 	flags = READ_ONCE(sqe->open_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) 	req->open.how = build_open_how(flags, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) 	return __io_openat_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) 	struct open_how __user *how;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) 	size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) 	how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) 	len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) 	if (len < OPEN_HOW_SIZE_VER0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) 	ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) 					len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) 	return __io_openat_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) static int io_openat2(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) 	struct open_flags op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) 	if (force_nonblock && !req->open.ignore_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) 	ret = build_open_flags(&req->open.how, &op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) 	ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) 	file = do_filp_open(req->open.dfd, req->open.filename, &op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) 	if (IS_ERR(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) 		put_unused_fd(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) 		ret = PTR_ERR(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) 		 * A work-around to ensure that /proc/self works that way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) 		 * that it should - if we get -EOPNOTSUPP back, then assume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) 		 * that proc_self_get_link() failed us because we're in async
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) 		 * context. We should be safe to retry this from the task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) 		 * itself with force_nonblock == false set, as it should not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) 		 * block on lookup. Would be nice to know this upfront and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) 		 * avoid the async dance, but doesn't seem feasible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) 		if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) 			req->open.ignore_nonblock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) 			refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) 			io_req_task_queue(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) 		fsnotify_open(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) 		fd_install(ret, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) 	putname(req->open.filename);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) 	req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) static int io_openat(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) 	return io_openat2(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) static int io_remove_buffers_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) 				  const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) 	struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) 	u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) 	if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) 	    sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) 	tmp = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) 	if (!tmp || tmp > USHRT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) 	memset(p, 0, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) 	p->nbufs = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) 	p->bgid = READ_ONCE(sqe->buf_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) 			       int bgid, unsigned nbufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) 	unsigned i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) 	/* shouldn't happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) 	if (!nbufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) 	/* the head kbuf is the list itself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) 	while (!list_empty(&buf->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) 		struct io_buffer *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) 		nxt = list_first_entry(&buf->list, struct io_buffer, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) 		list_del(&nxt->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) 		kfree(nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) 		if (++i == nbufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) 			return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) 	i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) 	kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) 	xa_erase(&ctx->io_buffers, bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) 	return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) 			     struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) 	struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) 	struct io_buffer *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) 	io_ring_submit_lock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) 	lockdep_assert_held(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) 	ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) 	head = xa_load(&ctx->io_buffers, p->bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) 	if (head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) 		ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) 	/* need to hold the lock to complete IOPOLL requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) 	if (ctx->flags & IORING_SETUP_IOPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) 		__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) 		io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) 		io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) 		__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) static int io_provide_buffers_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) 				   const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) 	unsigned long size, tmp_check;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) 	struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) 	u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) 	if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) 	tmp = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) 	if (!tmp || tmp > USHRT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) 		return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) 	p->nbufs = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) 	p->addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) 	p->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) 	if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) 				&size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) 		return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) 	if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) 		return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) 	size = (unsigned long)p->len * p->nbufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) 	if (!access_ok(u64_to_user_ptr(p->addr), size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) 	p->bgid = READ_ONCE(sqe->buf_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) 	tmp = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) 	if (tmp > USHRT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) 		return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) 	p->bid = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) 	struct io_buffer *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) 	u64 addr = pbuf->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) 	int i, bid = pbuf->bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) 	for (i = 0; i < pbuf->nbufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) 		buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) 		if (!buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) 		buf->addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) 		buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) 		buf->bid = bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) 		addr += pbuf->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) 		bid++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) 		if (!*head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) 			INIT_LIST_HEAD(&buf->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) 			*head = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) 			list_add_tail(&buf->list, &(*head)->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) 	return i ? i : -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) 			      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) 	struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) 	struct io_buffer *head, *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) 	io_ring_submit_lock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) 	lockdep_assert_held(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) 	list = head = xa_load(&ctx->io_buffers, p->bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) 	ret = io_add_buffers(p, &head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) 	if (ret >= 0 && !list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) 		ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) 			__io_remove_buffers(ctx, head, p->bgid, -1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) 	/* need to hold the lock to complete IOPOLL requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) 	if (ctx->flags & IORING_SETUP_IOPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) 		__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) 		io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) 		io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) 		__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) static int io_epoll_ctl_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) 			     const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) #if defined(CONFIG_EPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) 	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) 	req->epoll.epfd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) 	req->epoll.op = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) 	req->epoll.fd = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) 	if (ep_op_has_event(req->epoll.op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) 		struct epoll_event __user *ev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) 		ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) 		if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) 			struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) #if defined(CONFIG_EPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) 	struct io_epoll *ie = &req->epoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) 	ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) 	if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) 	__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) 	if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) 	req->madvise.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) 	req->madvise.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) 	req->madvise.advice = READ_ONCE(sqe->fadvise_advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) static int io_madvise(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) 	struct io_madvise *ma = &req->madvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) 	if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) 	ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) 	if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) 	req->fadvise.offset = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) 	req->fadvise.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) 	req->fadvise.advice = READ_ONCE(sqe->fadvise_advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) 	struct io_fadvise *fa = &req->fadvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) 	if (force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) 		switch (fa->advice) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) 		case POSIX_FADV_NORMAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) 		case POSIX_FADV_RANDOM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) 		case POSIX_FADV_SEQUENTIAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) 			return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) 	ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) 	if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) 	if (req->flags & REQ_F_FIXED_FILE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) 	req->statx.dfd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) 	req->statx.mask = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) 	req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) 	req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) 	req->statx.flags = READ_ONCE(sqe->statx_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) static int io_statx(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) 	struct io_statx *ctx = &req->statx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) 	if (force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) 		/* only need file table for an actual valid fd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) 		if (ctx->dfd == -1 || ctx->dfd == AT_FDCWD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) 			req->flags |= REQ_F_NO_FILE_TABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) 	ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) 		       ctx->buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) 	 * If we queue this for async, it must not be cancellable. That would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) 	 * leave the 'file' in an undeterminate state, and here need to modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) 	 * io_wq_work.flags, so initialize io_wq_work firstly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) 	io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) 	if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) 	    sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) 	if (req->flags & REQ_F_FIXED_FILE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) 	req->close.fd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) 	if ((req->file && req->file->f_op == &io_uring_fops))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) 	req->close.put_file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) static int io_close(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) 		    struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) 	struct io_close *close = &req->close;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) 	/* might be already done during nonblock submission */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) 	if (!close->put_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) 		ret = __close_fd_get_file(close->fd, &close->put_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) 			return (ret == -ENOENT) ? -EBADF : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) 	/* if the file has a flush method, be safe and punt to async */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) 	if (close->put_file->f_op->flush && force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) 		/* not safe to cancel at this point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) 		req->work.flags |= IO_WQ_WORK_NO_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) 		/* was never set, but play safe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) 		req->flags &= ~REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) 		/* avoid grabbing files - we don't need the files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) 		req->flags |= REQ_F_NO_FILE_TABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) 	/* No ->flush() or already async, safely close from here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) 	ret = filp_close(close->put_file, req->work.identity->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) 	fput(close->put_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) 	close->put_file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) 	__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) 	if (!req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) 	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) 		     sqe->splice_fd_in))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) 	req->sync.off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) 	req->sync.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) 	req->sync.flags = READ_ONCE(sqe->sync_range_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) 	/* sync_file_range always requires a blocking context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) 	if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) 	ret = sync_file_range(req->file, req->sync.off, req->sync.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) 				req->sync.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) #if defined(CONFIG_NET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) static int io_setup_async_msg(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) 			      struct io_async_msghdr *kmsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) 	struct io_async_msghdr *async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) 	if (async_msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) 	if (io_alloc_async_data(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) 		if (kmsg->iov != kmsg->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) 			kfree(kmsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) 	async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) 	req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) 	memcpy(async_msg, kmsg, sizeof(*kmsg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) 	return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) static int io_sendmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) 			       struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) 	iomsg->iov = iomsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) 	iomsg->msg.msg_name = &iomsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) 	return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) 				   req->sr_msg.msg_flags, &iomsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) 	struct io_async_msghdr *async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) 	struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) 	sr->msg_flags = READ_ONCE(sqe->msg_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) 	sr->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) 	if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) 		sr->msg_flags |= MSG_CMSG_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) 	if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) 	ret = io_sendmsg_copy_hdr(req, async_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) 		req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) 		      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) 	struct io_async_msghdr iomsg, *kmsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) 	struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) 	unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) 	int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) 	sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) 	if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) 	if (req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) 		kmsg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) 		kmsg->msg.msg_name = &kmsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) 		/* if iov is set, it's allocated already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) 		if (!kmsg->iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) 			kmsg->iov = kmsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) 		kmsg->msg.msg_iter.iov = kmsg->iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) 		ret = io_sendmsg_copy_hdr(req, &iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) 		kmsg = &iomsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) 	flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) 	if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) 		req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) 	else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) 		flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) 	if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) 	ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) 	if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) 		return io_setup_async_msg(req, kmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) 	if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) 	if (kmsg->iov != kmsg->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) 		kfree(kmsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) 	req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) 	if (ret < min_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) 	__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) static int io_send(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) 		   struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) 	struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) 	struct msghdr msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) 	struct iovec iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) 	struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) 	unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) 	int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) 	sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) 	if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) 	ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) 	msg.msg_name = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) 	msg.msg_control = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) 	msg.msg_controllen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) 	msg.msg_namelen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) 	flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) 	if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) 		req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) 	else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) 		flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) 	if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) 		min_ret = iov_iter_count(&msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) 	msg.msg_flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) 	ret = sock_sendmsg(sock, &msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) 	if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) 	if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) 	if (ret < min_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) 	__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) 				 struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) 	struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) 	struct iovec __user *uiov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) 	size_t iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) 	ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) 					&iomsg->uaddr, &uiov, &iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) 	if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) 		if (iov_len > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) 		if (copy_from_user(iomsg->iov, uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) 		sr->len = iomsg->iov[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) 		iov_iter_init(&iomsg->msg.msg_iter, READ, iomsg->iov, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) 				sr->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) 		iomsg->iov = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) 		ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) 				     &iomsg->iov, &iomsg->msg.msg_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) 				     false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) 		if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) 					struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) 	struct compat_msghdr __user *msg_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) 	struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) 	struct compat_iovec __user *uiov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) 	compat_uptr_t ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) 	compat_size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) 	msg_compat = (struct compat_msghdr __user *) sr->umsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) 	ret = __get_compat_msghdr(&iomsg->msg, msg_compat, &iomsg->uaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) 					&ptr, &len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) 	uiov = compat_ptr(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) 	if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) 		compat_ssize_t clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) 		if (len > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) 		if (!access_ok(uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) 		if (__get_user(clen, &uiov->iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) 		if (clen < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) 		sr->len = clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) 		iomsg->iov[0].iov_len = clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) 		iomsg->iov = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) 		ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) 				   UIO_FASTIOV, &iomsg->iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) 				   &iomsg->msg.msg_iter, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) static int io_recvmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) 			       struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) 	iomsg->msg.msg_name = &iomsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) 	iomsg->iov = iomsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) 	if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) 		return __io_compat_recvmsg_copy_hdr(req, iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) 	return __io_recvmsg_copy_hdr(req, iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) 					       bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) 	struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) 	struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) 	kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) 	if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) 		return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) 	sr->kbuf = kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) 	req->flags |= REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) 	return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) 	return io_put_kbuf(req, req->sr_msg.kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) static int io_recvmsg_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) 			   const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) 	struct io_async_msghdr *async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) 	struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) 	sr->msg_flags = READ_ONCE(sqe->msg_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) 	sr->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) 	sr->bgid = READ_ONCE(sqe->buf_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) 	if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) 		sr->msg_flags |= MSG_CMSG_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) 	if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) 	ret = io_recvmsg_copy_hdr(req, async_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) 		req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) 		      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) 	struct io_async_msghdr iomsg, *kmsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) 	struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) 	struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) 	unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) 	int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) 	int ret, cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) 	sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) 	if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) 	if (req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) 		kmsg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) 		kmsg->msg.msg_name = &kmsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) 		/* if iov is set, it's allocated already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) 		if (!kmsg->iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) 			kmsg->iov = kmsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) 		kmsg->msg.msg_iter.iov = kmsg->iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) 		ret = io_recvmsg_copy_hdr(req, &iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) 		kmsg = &iomsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) 	if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) 		kbuf = io_recv_buffer_select(req, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) 		if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) 			return PTR_ERR(kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) 		kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) 		iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) 				1, req->sr_msg.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) 	flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) 	if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) 		req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) 	else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) 		flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) 	if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) 	ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) 					kmsg->uaddr, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) 	if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) 		return io_setup_async_msg(req, kmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) 	if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) 	if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) 		cflags = io_put_recv_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) 	if (kmsg->iov != kmsg->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) 		kfree(kmsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) 	req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) 	if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) 	__io_req_complete(req, ret, cflags, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) static int io_recv(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) 		   struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) 	struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) 	struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) 	struct msghdr msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) 	void __user *buf = sr->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) 	struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) 	struct iovec iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) 	unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) 	int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) 	int ret, cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) 	sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) 	if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) 	if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) 		kbuf = io_recv_buffer_select(req, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) 		if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) 			return PTR_ERR(kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) 		buf = u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) 	ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) 	if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) 	msg.msg_name = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) 	msg.msg_control = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) 	msg.msg_controllen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) 	msg.msg_namelen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) 	msg.msg_iocb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) 	msg.msg_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) 	flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) 	if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) 		req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) 	else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) 		flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) 	if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) 		min_ret = iov_iter_count(&msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) 	ret = sock_recvmsg(sock, &msg, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) 	if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) 	if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) 	if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) 		cflags = io_put_recv_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) 	if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) 	__io_req_complete(req, ret, cflags, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) 	struct io_accept *accept = &req->accept;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) 	if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) 	accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) 	accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) 	accept->flags = READ_ONCE(sqe->accept_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) 	accept->nofile = rlimit(RLIMIT_NOFILE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) static int io_accept(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) 		     struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) 	struct io_accept *accept = &req->accept;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) 	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) 	if (req->file->f_flags & O_NONBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) 		req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) 	ret = __sys_accept4_file(req->file, file_flags, accept->addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) 					accept->addr_len, accept->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) 					accept->nofile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) 	if (ret == -EAGAIN && force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) 		if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) 			ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) 	__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) 	struct io_connect *conn = &req->connect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) 	struct io_async_connect *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) 	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) 	if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) 	    sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) 	conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) 	conn->addr_len =  READ_ONCE(sqe->addr2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) 	if (!io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) 	return move_addr_to_kernel(conn->addr, conn->addr_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) 					&io->address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) static int io_connect(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) 		      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) 	struct io_async_connect __io, *io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) 	unsigned file_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) 	if (req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) 		io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) 		ret = move_addr_to_kernel(req->connect.addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) 						req->connect.addr_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) 						&__io.address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) 		io = &__io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) 	file_flags = force_nonblock ? O_NONBLOCK : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) 	ret = __sys_connect_file(req->file, &io->address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) 					req->connect.addr_len, file_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) 	if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) 		if (req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) 			return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) 		if (io_alloc_async_data(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) 		io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) 		memcpy(req->async_data, &__io, sizeof(__io));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) 	if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) 	__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) #else /* !CONFIG_NET */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) 		      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) static int io_send(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) 		   struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) static int io_recvmsg_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) 			   const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) 		      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) static int io_recv(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) 		   struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) static int io_accept(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) 		     struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) static int io_connect(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) 		      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) 	return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) #endif /* CONFIG_NET */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) struct io_poll_table {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) 	struct poll_table_struct pt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) 	int nr_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) 	int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) 			   __poll_t mask, task_work_func_t func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) 	bool twa_signal_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) 	/* for instances that support it check for an event match first: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) 	if (mask && !(mask & poll->events))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) 	trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) 	list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) 	req->result = mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) 	init_task_work(&req->task_work, func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) 	percpu_ref_get(&req->ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) 	 * If we using the signalfd wait_queue_head for this wakeup, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) 	 * it's not safe to use TWA_SIGNAL as we could be recursing on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) 	 * tsk->sighand->siglock on doing the wakeup. Should not be needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) 	 * either, as the normal wakeup will suffice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) 	twa_signal_ok = (poll->head != &req->task->sighand->signalfd_wqh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) 	 * If this fails, then the task is exiting. When a task exits, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) 	 * work gets canceled, so just cancel this request as well instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) 	 * of executing it. We can't safely execute it anyway, as we may not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) 	 * have the needed state needed for it anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) 	ret = io_req_task_work_add(req, twa_signal_ok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) 	if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) 		struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) 		WRITE_ONCE(poll->canceled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) 		tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) 		task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) 		wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) 	__acquires(&req->ctx->completion_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) 	if (!req->result && !READ_ONCE(poll->canceled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) 		struct poll_table_struct pt = { ._key = poll->events };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) 		req->result = vfs_poll(req->file, &pt) & poll->events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) 	if (!req->result && !READ_ONCE(poll->canceled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) 		add_wait_queue(poll->head, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) 	/* pure poll stashes this in ->async_data, poll driven retry elsewhere */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) 	if (req->opcode == IORING_OP_POLL_ADD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) 		return req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) 	return req->apoll->double_poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) 	if (req->opcode == IORING_OP_POLL_ADD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) 		return &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) 	return &req->apoll->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) static void io_poll_remove_double(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) 	struct io_poll_iocb *poll = io_poll_get_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) 	lockdep_assert_held(&req->ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) 	if (poll && poll->head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) 		struct wait_queue_head *head = poll->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) 		spin_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) 		list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) 		if (poll->wait.private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) 			refcount_dec(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) 		poll->head = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) 		spin_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) 	io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) 	req->poll.done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) 	io_cqring_fill_event(req, error ? error : mangle_poll(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) static void io_poll_task_func(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) 	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) 	struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) 	if (io_poll_rewait(req, &req->poll)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) 		spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) 		hash_del(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) 		io_poll_complete(req, req->result, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) 		spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) 		nxt = io_put_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) 		io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) 		if (nxt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) 			__io_req_task_submit(nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) 	percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) 			       int sync, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) 	struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) 	struct io_poll_iocb *poll = io_poll_get_single(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) 	__poll_t mask = key_to_poll(key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) 	/* for instances that support it check for an event match first: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) 	if (mask && !(mask & poll->events))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) 	list_del_init(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) 	if (poll && poll->head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) 		bool done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) 		spin_lock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) 		done = list_empty(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) 		if (!done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) 			list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) 		/* make sure double remove sees this as being gone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) 		wait->private = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) 		spin_unlock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) 		if (!done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) 			/* use wait func handler, so it matches the rq type */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) 			poll->wait.func(&poll->wait, mode, sync, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) 	refcount_dec(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) 			      wait_queue_func_t wake_func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) 	poll->head = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) 	poll->done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) 	poll->canceled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) 	poll->events = events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) 	INIT_LIST_HEAD(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) 	init_waitqueue_func_entry(&poll->wait, wake_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) 			    struct wait_queue_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) 			    struct io_poll_iocb **poll_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) 	struct io_kiocb *req = pt->req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) 	 * The file being polled uses multiple waitqueues for poll handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) 	 * (e.g. one for read, one for write). Setup a separate io_poll_iocb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) 	 * if this happens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) 	if (unlikely(pt->nr_entries)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) 		struct io_poll_iocb *poll_one = poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) 		/* already have a 2nd entry, fail a third attempt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) 		if (*poll_ptr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) 			pt->error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) 		/* double add on the same waitqueue head, ignore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) 		if (poll->head == head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) 		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) 		if (!poll) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) 			pt->error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) 		io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) 		refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) 		poll->wait.private = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) 		*poll_ptr = poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) 	pt->nr_entries++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) 	poll->head = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) 	if (poll->events & EPOLLEXCLUSIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) 		add_wait_queue_exclusive(head, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) 		add_wait_queue(head, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) 			       struct poll_table_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) 	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) 	struct async_poll *apoll = pt->req->apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) 	__io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) static void io_async_task_func(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) 	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) 	struct async_poll *apoll = req->apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) 	trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) 	if (io_poll_rewait(req, &apoll->poll)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) 		spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) 		percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) 	/* If req is still hashed, it cannot have been canceled. Don't check. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) 	if (hash_hashed(&req->hash_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) 		hash_del(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) 	io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) 	if (!READ_ONCE(apoll->poll.canceled))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) 		__io_req_task_submit(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) 		__io_req_task_cancel(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) 	percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) 	kfree(apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) 	kfree(apoll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) 			void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) 	struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) 	struct io_poll_iocb *poll = &req->apoll->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) 	trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) 					key_to_poll(key));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) 	return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) static void io_poll_req_insert(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) 	struct hlist_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) 	list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) 	hlist_add_head(&req->hash_node, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) 				      struct io_poll_iocb *poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) 				      struct io_poll_table *ipt, __poll_t mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) 				      wait_queue_func_t wake_func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) 	__acquires(&ctx->completion_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) 	bool cancel = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) 	INIT_HLIST_NODE(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) 	io_init_poll_iocb(poll, mask, wake_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) 	poll->file = req->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) 	poll->wait.private = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) 	ipt->pt._key = mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) 	ipt->req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) 	ipt->error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) 	ipt->nr_entries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) 	mask = vfs_poll(req->file, &ipt->pt) & poll->events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) 	if (unlikely(!ipt->nr_entries) && !ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) 		ipt->error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) 	if (ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) 		io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) 	if (likely(poll->head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) 		spin_lock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) 		if (unlikely(list_empty(&poll->wait.entry))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) 			if (ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) 				cancel = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) 			ipt->error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252) 			mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) 		if (mask || ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) 			list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256) 		else if (cancel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) 			WRITE_ONCE(poll->canceled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) 		else if (!poll->done) /* actually waiting for an event */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) 			io_poll_req_insert(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) 		spin_unlock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) 	return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) static bool io_arm_poll_handler(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) 	const struct io_op_def *def = &io_op_defs[req->opcode];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) 	struct async_poll *apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) 	struct io_poll_table ipt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) 	__poll_t mask, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) 	int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) 	if (!req->file || !file_can_poll(req->file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) 	if (req->flags & REQ_F_POLLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) 	if (def->pollin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) 		rw = READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) 	else if (def->pollout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) 		rw = WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) 	/* if we can't nonblock try, then no point in arming a poll handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) 	if (!io_file_supports_async(req->file, rw))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) 	apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) 	if (unlikely(!apoll))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) 	apoll->double_poll = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) 	req->flags |= REQ_F_POLLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) 	req->apoll = apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) 	mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) 	if (def->pollin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) 		mask |= POLLIN | POLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) 	if (def->pollout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) 		mask |= POLLOUT | POLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) 	/* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) 	if ((req->opcode == IORING_OP_RECVMSG) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) 	    (req->sr_msg.msg_flags & MSG_ERRQUEUE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) 		mask &= ~POLLIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) 	mask |= POLLERR | POLLPRI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) 	ipt.pt._qproc = io_async_queue_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) 	ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) 					io_async_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) 	if (ret || ipt.error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) 		io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) 		spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) 		kfree(apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) 		kfree(apoll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) 	trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) 					apoll->poll.events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) static bool __io_poll_remove_one(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) 				 struct io_poll_iocb *poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) 	bool do_complete = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332) 	spin_lock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) 	WRITE_ONCE(poll->canceled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) 	if (!list_empty(&poll->wait.entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) 		list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) 		do_complete = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) 	spin_unlock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) 	hash_del(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) 	return do_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) static bool io_poll_remove_one(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) 	bool do_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347) 	io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) 	if (req->opcode == IORING_OP_POLL_ADD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) 		do_complete = __io_poll_remove_one(req, &req->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) 		struct async_poll *apoll = req->apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) 		/* non-poll requests have submit ref still */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355) 		do_complete = __io_poll_remove_one(req, &apoll->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) 		if (do_complete) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) 			io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) 			kfree(apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) 			kfree(apoll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) 	if (do_complete) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) 		io_cqring_fill_event(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) 		io_commit_cqring(req->ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) 		io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) 	return do_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374)  * Returns true if we found and killed one or more poll requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) 			       struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) 	struct hlist_node *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) 	int posted = 0, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) 	for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) 		struct hlist_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) 		list = &ctx->cancel_hash[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) 		hlist_for_each_entry_safe(req, tmp, list, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) 			if (io_match_task(req, tsk, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) 				posted += io_poll_remove_one(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395) 	if (posted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) 		io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) 	return posted != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) 	struct hlist_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) 	list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) 	hlist_for_each_entry(req, list, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) 		if (sqe_addr != req->user_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410) 		if (io_poll_remove_one(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412) 		return -EALREADY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) 	return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418) static int io_poll_remove_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) 			       const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) 	if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) 	    sqe->poll_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) 	req->poll.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432)  * Find a running poll command that matches one specified in sqe->addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433)  * and remove it if found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) static int io_poll_remove(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) 	u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) 	addr = req->poll.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) 	ret = io_poll_cancel(ctx, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) 	io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) 			void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) 	struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) 	struct io_poll_iocb *poll = &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) 	return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) 			       struct poll_table_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464) 	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) 	__io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) 	struct io_poll_iocb *poll = &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) 	u32 events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) 	if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) 	events = READ_ONCE(sqe->poll32_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) #ifdef __BIG_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) 	events = swahw32(events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) 	poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) 		       (events & EPOLLEXCLUSIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) static int io_poll_add(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) 	struct io_poll_iocb *poll = &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) 	struct io_poll_table ipt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) 	__poll_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) 	ipt.pt._qproc = io_poll_queue_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) 	mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) 					io_poll_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) 	if (mask) { /* no async, we'd stolen it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) 		ipt.error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) 		io_poll_complete(req, mask, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) 	if (mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) 		io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) 		io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) 	return ipt.error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) 	struct io_timeout_data *data = container_of(timer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) 						struct io_timeout_data, timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) 	struct io_kiocb *req = data->req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) 	list_del_init(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) 	atomic_set(&req->ctx->cq_timeouts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) 		atomic_read(&req->ctx->cq_timeouts) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) 	io_cqring_fill_event(req, -ETIME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) 	req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) 	io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) 	return HRTIMER_NORESTART;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) static int __io_timeout_cancel(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) 	struct io_timeout_data *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) 	ret = hrtimer_try_to_cancel(&io->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) 	if (ret == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) 		return -EALREADY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) 	list_del_init(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) 	req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) 	io_cqring_fill_event(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) 	io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) 	int ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) 	list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) 		if (user_data == req->user_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) 	if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) 	return __io_timeout_cancel(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) static int io_timeout_remove_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) 				  const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) 	if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) 	    sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) 	req->timeout_rem.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586)  * Remove or update an existing timeout command
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) static int io_timeout_remove(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) 	ret = io_timeout_cancel(ctx, req->timeout_rem.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) 	io_cqring_fill_event(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) 	io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) 			   bool is_timeout_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) 	struct io_timeout_data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) 	unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) 	u32 off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) 	if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) 	    sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) 	if (off && is_timeout_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) 	flags = READ_ONCE(sqe->timeout_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) 	if (flags & ~IORING_TIMEOUT_ABS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) 	req->timeout.off = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) 	if (!req->async_data && io_alloc_async_data(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629) 	data = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) 	data->req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) 	if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) 	if (flags & IORING_TIMEOUT_ABS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) 		data->mode = HRTIMER_MODE_ABS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) 		data->mode = HRTIMER_MODE_REL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) 	INIT_LIST_HEAD(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) 	hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) static int io_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) 	struct io_timeout_data *data = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) 	struct list_head *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) 	u32 tail, off = req->timeout.off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) 	 * sqe->off holds how many events that need to occur for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) 	 * timeout event to be satisfied. If it isn't set, then this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) 	 * a pure timeout request, sequence isn't used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) 	if (io_is_timeout_noseq(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) 		entry = ctx->timeout_list.prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) 		goto add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) 	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) 	req->timeout.target_seq = tail + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) 	/* Update the last seq here in case io_flush_timeouts() hasn't.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) 	 * This is safe because ->completion_lock is held, and submissions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) 	 * and completions are never mixed in the same ->completion_lock section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) 	ctx->cq_last_tm_flush = tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) 	 * Insertion sort, ensuring the first entry in the list is always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) 	 * the one we need first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) 	list_for_each_prev(entry, &ctx->timeout_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) 		struct io_kiocb *nxt = list_entry(entry, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) 						  timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) 		if (io_is_timeout_noseq(nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) 		/* nxt.seq is behind @tail, otherwise would've been completed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) 		if (off >= nxt->timeout.target_seq - tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) add:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) 	list_add(&req->timeout.list, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) 	data->timer.function = io_timeout_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) 	hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) static bool io_cancel_cb(struct io_wq_work *work, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699) 	return req->user_data == (unsigned long) data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) 	enum io_wq_cancel cancel_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) 	cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708) 	switch (cancel_ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) 	case IO_WQ_CANCEL_OK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) 	case IO_WQ_CANCEL_RUNNING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713) 		ret = -EALREADY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) 	case IO_WQ_CANCEL_NOTFOUND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) 		ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) 				     struct io_kiocb *req, __u64 sqe_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) 				     int success_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) 	ret = io_async_cancel_one(ctx, (void *) (unsigned long) sqe_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) 	if (ret != -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) 		spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) 	ret = io_timeout_cancel(ctx, sqe_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) 	if (ret != -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) 	ret = io_poll_cancel(ctx, sqe_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) 		ret = success_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) 	io_cqring_fill_event(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745) 	io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) 	io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) 	io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) static int io_async_cancel_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) 				const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759) 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) 	if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) 	    sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) 	req->cancel.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) static int io_async_cancel(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) 	io_async_find_and_cancel(ctx, req, req->cancel.addr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) static int io_files_update_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) 				const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) 	if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) 	if (sqe->ioprio || sqe->rw_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) 	req->files_update.offset = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) 	req->files_update.nr_args = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) 	if (!req->files_update.nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) 	req->files_update.arg = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) static int io_files_update(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) 			   struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) 	struct io_uring_files_update up;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) 	if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) 	up.offset = req->files_update.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) 	up.fds = req->files_update.arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) 	ret = __io_sqe_files_update(ctx, &up, req->files_update.nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814) 	__io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) 	switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) 	case IORING_OP_NOP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) 	case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) 	case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) 	case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) 		return io_read_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) 	case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) 	case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) 	case IORING_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830) 		return io_write_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) 	case IORING_OP_POLL_ADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832) 		return io_poll_add_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) 	case IORING_OP_POLL_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) 		return io_poll_remove_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) 	case IORING_OP_FSYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836) 		return io_prep_fsync(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) 	case IORING_OP_SYNC_FILE_RANGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838) 		return io_prep_sfr(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) 	case IORING_OP_SENDMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) 	case IORING_OP_SEND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) 		return io_sendmsg_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) 	case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) 	case IORING_OP_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) 		return io_recvmsg_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) 	case IORING_OP_CONNECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) 		return io_connect_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) 	case IORING_OP_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) 		return io_timeout_prep(req, sqe, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) 	case IORING_OP_TIMEOUT_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) 		return io_timeout_remove_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) 	case IORING_OP_ASYNC_CANCEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) 		return io_async_cancel_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) 	case IORING_OP_LINK_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) 		return io_timeout_prep(req, sqe, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) 	case IORING_OP_ACCEPT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) 		return io_accept_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857) 	case IORING_OP_FALLOCATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) 		return io_fallocate_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) 	case IORING_OP_OPENAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) 		return io_openat_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) 	case IORING_OP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) 		return io_close_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) 	case IORING_OP_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) 		return io_files_update_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) 	case IORING_OP_STATX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) 		return io_statx_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) 	case IORING_OP_FADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) 		return io_fadvise_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) 	case IORING_OP_MADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) 		return io_madvise_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) 	case IORING_OP_OPENAT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) 		return io_openat2_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) 	case IORING_OP_EPOLL_CTL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) 		return io_epoll_ctl_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) 	case IORING_OP_SPLICE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) 		return io_splice_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877) 	case IORING_OP_PROVIDE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) 		return io_provide_buffers_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) 	case IORING_OP_REMOVE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880) 		return io_remove_buffers_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) 	case IORING_OP_TEE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) 		return io_tee_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) 			req->opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) 	return-EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) static int io_req_defer_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) 			     const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) 	if (!sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) 	if (io_alloc_async_data(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) 	return io_req_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) static u32 io_get_sequence(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) 	struct io_kiocb *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) 	u32 total_submitted, nr_reqs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) 	if (req->flags & REQ_F_LINK_HEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907) 		list_for_each_entry(pos, &req->link_list, link_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) 			nr_reqs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) 	total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) 	return total_submitted - nr_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) 	struct io_defer_entry *de;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) 	u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) 	/* Still need defer if there is pending req in defer list. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) 	if (likely(list_empty_careful(&ctx->defer_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) 		!(req->flags & REQ_F_IO_DRAIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) 	seq = io_get_sequence(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) 	/* Still a chance to pass the sequence check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) 	if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) 	if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932) 		ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) 	io_prep_async_link(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) 	de = kmalloc(sizeof(*de), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) 	if (!de)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) 	if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) 		spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) 		kfree(de);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) 		io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) 		return -EIOCBQUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) 	trace_io_uring_defer(ctx, req, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) 	de->req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) 	de->seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) 	list_add_tail(&de->list, &ctx->defer_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) 	return -EIOCBQUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) static void io_req_drop_files(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) 	struct io_uring_task *tctx = req->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) 	if (req->work.flags & IO_WQ_WORK_FILES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) 		put_files_struct(req->work.identity->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965) 		put_nsproxy(req->work.identity->nsproxy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) 	spin_lock_irqsave(&ctx->inflight_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) 	list_del(&req->inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) 	spin_unlock_irqrestore(&ctx->inflight_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) 	req->flags &= ~REQ_F_INFLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971) 	req->work.flags &= ~IO_WQ_WORK_FILES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) 	if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) 		wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) static void __io_clean_op(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) 	if (req->flags & REQ_F_BUFFER_SELECTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) 		switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) 		case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) 		case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) 		case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983) 			kfree((void *)(unsigned long)req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985) 		case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986) 		case IORING_OP_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) 			kfree(req->sr_msg.kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990) 		req->flags &= ~REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993) 	if (req->flags & REQ_F_NEED_CLEANUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) 		switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) 		case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996) 		case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) 		case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998) 		case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999) 		case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) 		case IORING_OP_WRITE: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001) 			struct io_async_rw *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) 			if (io->free_iovec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003) 				kfree(io->free_iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) 		case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007) 		case IORING_OP_SENDMSG: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) 			struct io_async_msghdr *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) 			if (io->iov != io->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) 				kfree(io->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013) 		case IORING_OP_SPLICE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014) 		case IORING_OP_TEE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015) 			io_put_file(req, req->splice.file_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016) 				    (req->splice.flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) 		case IORING_OP_OPENAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019) 		case IORING_OP_OPENAT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) 			if (req->open.filename)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) 				putname(req->open.filename);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) 		req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029) 			struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034) 	switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) 	case IORING_OP_NOP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) 		ret = io_nop(req, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) 	case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) 	case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040) 	case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041) 		ret = io_read(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) 	case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044) 	case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) 	case IORING_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046) 		ret = io_write(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) 	case IORING_OP_FSYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049) 		ret = io_fsync(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051) 	case IORING_OP_POLL_ADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052) 		ret = io_poll_add(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) 	case IORING_OP_POLL_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) 		ret = io_poll_remove(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057) 	case IORING_OP_SYNC_FILE_RANGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) 		ret = io_sync_file_range(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) 	case IORING_OP_SENDMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) 		ret = io_sendmsg(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063) 	case IORING_OP_SEND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) 		ret = io_send(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) 	case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067) 		ret = io_recvmsg(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) 	case IORING_OP_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070) 		ret = io_recv(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072) 	case IORING_OP_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073) 		ret = io_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075) 	case IORING_OP_TIMEOUT_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076) 		ret = io_timeout_remove(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078) 	case IORING_OP_ACCEPT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079) 		ret = io_accept(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081) 	case IORING_OP_CONNECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082) 		ret = io_connect(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084) 	case IORING_OP_ASYNC_CANCEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) 		ret = io_async_cancel(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) 	case IORING_OP_FALLOCATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088) 		ret = io_fallocate(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090) 	case IORING_OP_OPENAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) 		ret = io_openat(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) 	case IORING_OP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) 		ret = io_close(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096) 	case IORING_OP_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097) 		ret = io_files_update(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) 	case IORING_OP_STATX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100) 		ret = io_statx(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102) 	case IORING_OP_FADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) 		ret = io_fadvise(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) 	case IORING_OP_MADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) 		ret = io_madvise(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108) 	case IORING_OP_OPENAT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) 		ret = io_openat2(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) 	case IORING_OP_EPOLL_CTL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) 		ret = io_epoll_ctl(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114) 	case IORING_OP_SPLICE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115) 		ret = io_splice(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) 	case IORING_OP_PROVIDE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) 		ret = io_provide_buffers(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120) 	case IORING_OP_REMOVE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) 		ret = io_remove_buffers(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) 	case IORING_OP_TEE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124) 		ret = io_tee(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) 	/* If the op doesn't have a file, we're not polling for it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) 	if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136) 		const bool in_async = io_wq_current_is_worker();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) 		/* workqueue context doesn't hold uring_lock, grab it now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139) 		if (in_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) 			mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142) 		io_iopoll_req_issued(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144) 		if (in_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145) 			mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154) 	struct io_kiocb *timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) 	timeout = io_prep_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158) 	if (timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159) 		io_queue_linked_timeout(timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161) 	/* if NO_CANCEL is set, we must still run the work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162) 	if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163) 				IO_WQ_WORK_CANCEL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164) 		ret = -ECANCELED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) 	if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169) 			ret = io_issue_sqe(req, false, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171) 			 * We can get EAGAIN for polled IO even though we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172) 			 * forcing a sync submission from here, since we can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) 			 * wait for request slots on the block side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) 			if (ret != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) 		} while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) 		struct io_ring_ctx *lock_ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184) 		if (req->ctx->flags & IORING_SETUP_IOPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) 			lock_ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) 		 * io_iopoll_complete() does not hold completion_lock to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) 		 * complete polled io, so here for polled io, we can not call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190) 		 * io_req_complete() directly, otherwise there maybe concurrent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191) 		 * access to cqring, defer_list, etc, which is not safe. Given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) 		 * that io_iopoll_complete() is always called under uring_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193) 		 * so here for polled io, we also get uring_lock to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) 		 * it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196) 		if (lock_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) 			mutex_lock(&lock_ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) 		io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202) 		if (lock_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203) 			mutex_unlock(&lock_ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206) 	return io_steal_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210) 					      int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212) 	struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) 	table = &ctx->file_data->table[index >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) 	return table->files[index & IORING_FILE_TABLE_MASK];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218) static struct file *io_file_get(struct io_submit_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) 				struct io_kiocb *req, int fd, bool fixed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224) 	if (fixed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225) 		if (unlikely((unsigned int)fd >= ctx->nr_user_files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227) 		fd = array_index_nospec(fd, ctx->nr_user_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228) 		file = io_file_from_index(ctx, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229) 		if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230) 			req->fixed_file_refs = &ctx->file_data->node->refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) 			percpu_ref_get(req->fixed_file_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) 		trace_io_uring_file_get(ctx, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235) 		file = __io_file_get(state, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238) 	if (file && file->f_op == &io_uring_fops &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239) 	    !(req->flags & REQ_F_INFLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) 		io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241) 		req->flags |= REQ_F_INFLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243) 		spin_lock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244) 		list_add(&req->inflight_entry, &ctx->inflight_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) 		spin_unlock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248) 	return file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251) static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) 			   int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254) 	bool fixed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) 	fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257) 	if (unlikely(!fixed && io_async_submit(req->ctx)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) 	req->file = io_file_get(state, req, fd, fixed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) 	if (req->file || io_op_defs[req->opcode].needs_file_no_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) 	return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266) static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268) 	struct io_timeout_data *data = container_of(timer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) 						struct io_timeout_data, timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270) 	struct io_kiocb *req = data->req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) 	struct io_kiocb *prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275) 	spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278) 	 * We don't expect the list to be empty, that will only happen if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279) 	 * race with the completion of the linked work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) 	if (!list_empty(&req->link_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) 		prev = list_entry(req->link_list.prev, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283) 				  link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) 		list_del_init(&req->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) 		if (!refcount_inc_not_zero(&prev->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286) 			prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) 	list_del(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290) 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) 	if (prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293) 		io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294) 		io_put_req_deferred(prev, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) 		io_cqring_add_event(req, -ETIME, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297) 		io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299) 	return HRTIMER_NORESTART;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) static void __io_queue_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305) 	 * If the list is now empty, then our linked request finished before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) 	 * we got a chance to setup the timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308) 	if (!list_empty(&req->link_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) 		struct io_timeout_data *data = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311) 		data->timer.function = io_link_timeout_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312) 		hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313) 				data->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) static void io_queue_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) 	__io_queue_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) 	/* drop submission reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326) 	io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329) static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) 	struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) 	if (!(req->flags & REQ_F_LINK_HEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335) 	if (req->flags & REQ_F_LINK_TIMEOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338) 	nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) 					link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) 	if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) 	nxt->flags |= REQ_F_LTIMEOUT_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) 	req->flags |= REQ_F_LINK_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) 	return nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) 	struct io_kiocb *linked_timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) 	const struct cred *old_creds = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355) 	linked_timeout = io_prep_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) 	if ((req->flags & REQ_F_WORK_INITIALIZED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) 	    (req->work.flags & IO_WQ_WORK_CREDS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) 	    req->work.identity->creds != current_cred()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360) 		if (old_creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361) 			revert_creds(old_creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) 		if (old_creds == req->work.identity->creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) 			old_creds = NULL; /* restored original creds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365) 			old_creds = override_creds(req->work.identity->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368) 	ret = io_issue_sqe(req, true, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371) 	 * We async punt it if the file wasn't marked NOWAIT, or if the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) 	 * doesn't support non-blocking read/write attempts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374) 	if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375) 		if (!io_arm_poll_handler(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377) 			 * Queued up for async execution, worker will release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378) 			 * submit reference when the iocb is actually submitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380) 			io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383) 		if (linked_timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384) 			io_queue_linked_timeout(linked_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385) 	} else if (likely(!ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386) 		/* drop submission reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387) 		req = io_put_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388) 		if (linked_timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389) 			io_queue_linked_timeout(linked_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391) 		if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392) 			if (!(req->flags & REQ_F_FORCE_ASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393) 				goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394) 			io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397) 		/* un-prep timeout, so it'll be killed as any other linked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) 		req->flags &= ~REQ_F_LINK_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399) 		req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400) 		io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) 		io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) 	if (old_creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405) 		revert_creds(old_creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408) static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409) 			 struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) 	ret = io_req_defer(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415) 		if (ret != -EIOCBQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) fail_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417) 			req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418) 			io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419) 			io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) 	} else if (req->flags & REQ_F_FORCE_ASYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) 		if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) 			ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424) 			if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425) 				goto fail_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) 		io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429) 		if (sqe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) 			ret = io_req_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) 			if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432) 				goto fail_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) 		__io_queue_sqe(req, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438) static inline void io_queue_link_head(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439) 				      struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) 	if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442) 		io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443) 		io_req_complete(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445) 		io_queue_sqe(req, NULL, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448) static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) 			 struct io_kiocb **link, struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451) 	struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) 	 * If we already have a head request, queue this one for async
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456) 	 * submittal once the head completes. If we don't have a head but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457) 	 * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458) 	 * submitted sync once the chain is complete. If none of those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) 	 * conditions are true (normal request), then just queue it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) 	if (*link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462) 		struct io_kiocb *head = *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) 		 * Taking sequential execution of a link, draining both sides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) 		 * of the link also fullfils IOSQE_IO_DRAIN semantics for all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) 		 * requests in the link. So, it drains the head and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468) 		 * next after the link request. The last one is done via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) 		 * drain_next flag to persist the effect across calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) 		if (req->flags & REQ_F_IO_DRAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) 			head->flags |= REQ_F_IO_DRAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) 			ctx->drain_next = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) 		ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) 		if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477) 			/* fail even hard links since we don't submit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) 			head->flags |= REQ_F_FAIL_LINK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481) 		trace_io_uring_link(ctx, req, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) 		list_add_tail(&req->link_list, &head->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484) 		/* last request of a link, enqueue the link */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) 		if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) 			io_queue_link_head(head, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487) 			*link = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) 		if (unlikely(ctx->drain_next)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) 			req->flags |= REQ_F_IO_DRAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) 			ctx->drain_next = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) 		if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495) 			req->flags |= REQ_F_LINK_HEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496) 			INIT_LIST_HEAD(&req->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498) 			ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) 			if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) 				req->flags |= REQ_F_FAIL_LINK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) 			*link = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) 			io_queue_sqe(req, sqe, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511)  * Batched submission is done, ensure local IO is flushed out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) static void io_submit_state_end(struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515) 	if (!list_empty(&state->comp.list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516) 		io_submit_flush_completions(&state->comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) 	blk_finish_plug(&state->plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) 	io_state_file_put(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519) 	if (state->free_reqs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520) 		kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524)  * Start submission side cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) static void io_submit_state_start(struct io_submit_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527) 				  struct io_ring_ctx *ctx, unsigned int max_ios)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529) 	blk_start_plug(&state->plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) 	state->comp.nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531) 	INIT_LIST_HEAD(&state->comp.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) 	state->comp.ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) 	state->free_reqs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534) 	state->file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535) 	state->ios_left = max_ios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) static void io_commit_sqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540) 	struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543) 	 * Ensure any loads from the SQEs are done at this point,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544) 	 * since once we write the new head, the application could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545) 	 * write new data to them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547) 	smp_store_release(&rings->sq.head, ctx->cached_sq_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551)  * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552)  * that is mapped by userspace. This means that care needs to be taken to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553)  * ensure that reads are stable, as we cannot rely on userspace always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554)  * being a good citizen. If members of the sqe are validated and then later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555)  * used, it's important that those reads are done through READ_ONCE() to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556)  * prevent a re-load down the line.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558) static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560) 	u32 *sq_array = ctx->sq_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561) 	unsigned head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564) 	 * The cached sq head (or cq tail) serves two purposes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) 	 * 1) allows us to batch the cost of updating the user visible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567) 	 *    head updates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) 	 * 2) allows the kernel side to track the head on its own, even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569) 	 *    though the application is the one updating it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) 	head = READ_ONCE(sq_array[ctx->cached_sq_head & ctx->sq_mask]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572) 	if (likely(head < ctx->sq_entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) 		return &ctx->sq_sqes[head];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) 	/* drop invalid entries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576) 	ctx->cached_sq_dropped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577) 	WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581) static inline void io_consume_sqe(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583) 	ctx->cached_sq_head++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587)  * Check SQE restrictions (opcode and flags).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589)  * Returns 'true' if SQE is allowed, 'false' otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591) static inline bool io_check_restriction(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) 					struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593) 					unsigned int sqe_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595) 	if (!ctx->restricted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) 	if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601) 	if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602) 	    ctx->restrictions.sqe_flags_required)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605) 	if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) 			  ctx->restrictions.sqe_flags_required))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613) 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) 				IOSQE_BUFFER_SELECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617) 		       const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618) 		       struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) 	unsigned int sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621) 	int id, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) 	req->opcode = READ_ONCE(sqe->opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624) 	req->user_data = READ_ONCE(sqe->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) 	req->async_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626) 	req->file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) 	req->ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) 	req->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629) 	/* one is dropped after submission, the other at completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630) 	refcount_set(&req->refs, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) 	req->task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) 	req->result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634) 	if (unlikely(req->opcode >= IORING_OP_LAST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) 	if (unlikely(io_sq_thread_acquire_mm(ctx, req)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640) 	sqe_flags = READ_ONCE(sqe->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641) 	/* enforce forwards compatibility on users */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642) 	if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) 	if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) 		return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) 	if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649) 	    !io_op_defs[req->opcode].buffer_select)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652) 	id = READ_ONCE(sqe->personality);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) 	if (id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654) 		struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) 		iod = xa_load(&ctx->personalities, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) 		if (unlikely(!iod))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) 		refcount_inc(&iod->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661) 		__io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662) 		get_cred(iod->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) 		req->work.identity = iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664) 		req->work.flags |= IO_WQ_WORK_CREDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) 	/* same numerical values with corresponding REQ_F_*, safe to copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668) 	req->flags |= sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670) 	if (!io_op_defs[req->opcode].needs_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673) 	ret = io_req_set_file(state, req, READ_ONCE(sqe->fd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) 	state->ios_left--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680) 	struct io_submit_state state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) 	struct io_kiocb *link = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) 	int i, submitted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) 	/* if we have a backlog and couldn't flush it all, return BUSY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685) 	if (test_bit(0, &ctx->sq_check_overflow)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) 		if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) 	/* make sure SQ entry isn't read before tail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691) 	nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) 	percpu_counter_add(&current->io_uring->inflight, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) 	refcount_add(nr, &current->usage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) 	io_submit_state_start(&state, ctx, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702) 		const struct io_uring_sqe *sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703) 		struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704) 		int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706) 		sqe = io_get_sqe(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707) 		if (unlikely(!sqe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708) 			io_consume_sqe(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711) 		req = io_alloc_req(ctx, &state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712) 		if (unlikely(!req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713) 			if (!submitted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) 				submitted = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) 		io_consume_sqe(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) 		/* will complete beyond this point, count as submitted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719) 		submitted++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6721) 		err = io_init_req(ctx, req, sqe, &state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6722) 		if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6723) fail_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6724) 			io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6725) 			io_req_complete(req, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6726) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6727) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6729) 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6730) 						true, io_async_submit(ctx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6731) 		err = io_submit_sqe(req, sqe, &link, &state.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6732) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6733) 			goto fail_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6734) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6736) 	if (unlikely(submitted != nr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6737) 		int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6738) 		struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6739) 		int unused = nr - ref_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6741) 		percpu_ref_put_many(&ctx->refs, unused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6742) 		percpu_counter_sub(&tctx->inflight, unused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6743) 		put_task_struct_many(current, unused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6744) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6745) 	if (link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6746) 		io_queue_link_head(link, &state.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6747) 	io_submit_state_end(&state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6749) 	 /* Commit SQ ring head once we've consumed and submitted all SQEs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6750) 	io_commit_sqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6752) 	return submitted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6755) static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6757) 	/* Tell userspace we may need a wakeup call */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6758) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6759) 	ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6760) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6763) static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6765) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6766) 	ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6767) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6770) static int io_sq_wake_function(struct wait_queue_entry *wqe, unsigned mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6771) 			       int sync, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6773) 	struct io_ring_ctx *ctx = container_of(wqe, struct io_ring_ctx, sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6774) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6776) 	ret = autoremove_wake_function(wqe, mode, sync, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6777) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6778) 		unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6780) 		spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6781) 		ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6782) 		spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6783) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6784) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6787) enum sq_ret {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6788) 	SQT_IDLE	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6789) 	SQT_SPIN	= 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6790) 	SQT_DID_WORK	= 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6791) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6793) static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6794) 				  unsigned long start_jiffies, bool cap_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6796) 	unsigned long timeout = start_jiffies + ctx->sq_thread_idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6797) 	struct io_sq_data *sqd = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6798) 	unsigned int to_submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6799) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6801) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6802) 	if (!list_empty(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6803) 		unsigned nr_events = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6805) 		mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6806) 		if (!list_empty(&ctx->iopoll_list) && !need_resched())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6807) 			io_do_iopoll(ctx, &nr_events, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6808) 		mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6809) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6811) 	to_submit = io_sqring_entries(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6813) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6814) 	 * If submit got -EBUSY, flag us as needing the application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6815) 	 * to enter the kernel to reap and flush events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6816) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6817) 	if (!to_submit || ret == -EBUSY || need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6818) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6819) 		 * Drop cur_mm before scheduling, we can't hold it for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6820) 		 * long periods (or over schedule()). Do this before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6821) 		 * adding ourselves to the waitqueue, as the unuse/drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6822) 		 * may sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6823) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6824) 		io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6826) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6827) 		 * We're polling. If we're within the defined idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6828) 		 * period, then let us spin without work before going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6829) 		 * to sleep. The exception is if we got EBUSY doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6830) 		 * more IO, we should wait for the application to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6831) 		 * reap events and wake us up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6832) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6833) 		if (!list_empty(&ctx->iopoll_list) || need_resched() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6834) 		    (!time_after(jiffies, timeout) && ret != -EBUSY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6835) 		    !percpu_ref_is_dying(&ctx->refs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6836) 			return SQT_SPIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6838) 		prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6839) 					TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6841) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6842) 		 * While doing polled IO, before going to sleep, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6843) 		 * to check if there are new reqs added to iopoll_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6844) 		 * it is because reqs may have been punted to io worker
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6845) 		 * and will be added to iopoll_list later, hence check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6846) 		 * the iopoll_list again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6847) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6848) 		if ((ctx->flags & IORING_SETUP_IOPOLL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6849) 		    !list_empty_careful(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6850) 			finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6851) 			goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6852) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6854) 		to_submit = io_sqring_entries(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6855) 		if (!to_submit || ret == -EBUSY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6856) 			return SQT_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6857) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6859) 	finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6860) 	io_ring_clear_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6862) 	/* if we're handling multiple rings, cap submit size for fairness */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6863) 	if (cap_entries && to_submit > 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6864) 		to_submit = 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6866) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6867) 	if (likely(!percpu_ref_is_dying(&ctx->refs) && !ctx->sqo_dead))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6868) 		ret = io_submit_sqes(ctx, to_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6869) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6871) 	if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6872) 		wake_up(&ctx->sqo_sq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6874) 	return SQT_DID_WORK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6877) static void io_sqd_init_new(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6879) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6881) 	while (!list_empty(&sqd->ctx_new_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6882) 		ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6883) 		init_wait(&ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6884) 		ctx->sqo_wait_entry.func = io_sq_wake_function;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6885) 		list_move_tail(&ctx->sqd_list, &sqd->ctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6886) 		complete(&ctx->sq_thread_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6887) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6890) static int io_sq_thread(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6891) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6892) 	struct cgroup_subsys_state *cur_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6893) 	const struct cred *old_cred = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6894) 	struct io_sq_data *sqd = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6895) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6896) 	unsigned long start_jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6898) 	start_jiffies = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6899) 	while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6900) 		enum sq_ret ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6901) 		bool cap_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6903) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6904) 		 * Any changes to the sqd lists are synchronized through the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6905) 		 * kthread parking. This synchronizes the thread vs users,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6906) 		 * the users are synchronized on the sqd->ctx_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6907) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6908) 		if (kthread_should_park()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6909) 			kthread_parkme();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6910) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6911) 			 * When sq thread is unparked, in case the previous park operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6912) 			 * comes from io_put_sq_data(), which means that sq thread is going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6913) 			 * to be stopped, so here needs to have a check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6914) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6915) 			if (kthread_should_stop())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6916) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6917) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6919) 		if (unlikely(!list_empty(&sqd->ctx_new_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6920) 			io_sqd_init_new(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6922) 		cap_entries = !list_is_singular(&sqd->ctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6924) 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6925) 			if (current->cred != ctx->creds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6926) 				if (old_cred)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6927) 					revert_creds(old_cred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6928) 				old_cred = override_creds(ctx->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6929) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6930) 			io_sq_thread_associate_blkcg(ctx, &cur_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6931) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6932) 			current->loginuid = ctx->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6933) 			current->sessionid = ctx->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6934) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6936) 			ret |= __io_sq_thread(ctx, start_jiffies, cap_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6938) 			io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6939) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6941) 		if (ret & SQT_SPIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6942) 			io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6943) 			io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6944) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6945) 		} else if (ret == SQT_IDLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6946) 			if (kthread_should_park())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6947) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6948) 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6949) 				io_ring_set_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6950) 			schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6951) 			start_jiffies = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6952) 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6953) 				io_ring_clear_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6954) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6955) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6957) 	io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6958) 	io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6960) 	if (cur_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6961) 		io_sq_thread_unassociate_blkcg();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6962) 	if (old_cred)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6963) 		revert_creds(old_cred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6965) 	kthread_parkme();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6967) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6970) struct io_wait_queue {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6971) 	struct wait_queue_entry wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6972) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6973) 	unsigned to_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6974) 	unsigned nr_timeouts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6975) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6977) static inline bool io_should_wake(struct io_wait_queue *iowq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6979) 	struct io_ring_ctx *ctx = iowq->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6981) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6982) 	 * Wake up if we have enough events, or if a timeout occurred since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6983) 	 * started waiting. For timeouts, we always want to return to userspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6984) 	 * regardless of event count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6985) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6986) 	return io_cqring_events(ctx) >= iowq->to_wait ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6987) 			atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6989) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6990) static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6991) 			    int wake_flags, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6993) 	struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6994) 							wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6996) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6997) 	 * Cannot safely flush overflowed CQEs from here, ensure we wake up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6998) 	 * the task, and the next invocation will do it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6999) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7000) 	if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7001) 		return autoremove_wake_function(curr, mode, wake_flags, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7002) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7005) static int io_run_task_work_sig(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7007) 	if (io_run_task_work())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7008) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7009) 	if (!signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7010) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7011) 	if (current->jobctl & JOBCTL_TASK_WORK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7012) 		spin_lock_irq(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7013) 		current->jobctl &= ~JOBCTL_TASK_WORK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7014) 		recalc_sigpending();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7015) 		spin_unlock_irq(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7016) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7017) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7018) 	return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7021) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7022)  * Wait until events become available, if we don't already have some. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7023)  * application must reap them itself, as they reside on the shared cq ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7024)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7025) static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7026) 			  const sigset_t __user *sig, size_t sigsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7027) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7028) 	struct io_wait_queue iowq = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7029) 		.wq = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7030) 			.private	= current,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7031) 			.func		= io_wake_function,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7032) 			.entry		= LIST_HEAD_INIT(iowq.wq.entry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7033) 		},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7034) 		.ctx		= ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7035) 		.to_wait	= min_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7036) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7037) 	struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7038) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7040) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7041) 		io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7042) 		if (io_cqring_events(ctx) >= min_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7043) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7044) 		if (!io_run_task_work())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7045) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7046) 	} while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7048) 	if (sig) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7049) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7050) 		if (in_compat_syscall())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7051) 			ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7052) 						      sigsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7053) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7054) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7055) 			ret = set_user_sigmask(sig, sigsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7057) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7058) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7059) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7061) 	iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7062) 	trace_io_uring_cqring_wait(ctx, min_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7063) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7064) 		io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7065) 		prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7066) 						TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7067) 		/* make sure we run task_work before checking for signals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7068) 		ret = io_run_task_work_sig();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7069) 		if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7070) 			finish_wait(&ctx->wait, &iowq.wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7071) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7072) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7073) 		else if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7074) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7075) 		if (io_should_wake(&iowq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7076) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7077) 		if (test_bit(0, &ctx->cq_check_overflow)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7078) 			finish_wait(&ctx->wait, &iowq.wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7079) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7080) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7081) 		schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7082) 	} while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7083) 	finish_wait(&ctx->wait, &iowq.wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7085) 	restore_saved_sigmask_unless(ret == -EINTR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7087) 	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7090) static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7091) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7092) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7093) 	if (ctx->ring_sock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7094) 		struct sock *sock = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7095) 		struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7097) 		while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7098) 			kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7099) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7100) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7101) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7103) 	for (i = 0; i < ctx->nr_user_files; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7104) 		struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7106) 		file = io_file_from_index(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7107) 		if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7108) 			fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7110) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7113) static void io_file_ref_kill(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7115) 	struct fixed_file_data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7117) 	data = container_of(ref, struct fixed_file_data, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7118) 	complete(&data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7121) static void io_sqe_files_set_node(struct fixed_file_data *file_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7122) 				  struct fixed_file_ref_node *ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7124) 	spin_lock_bh(&file_data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7125) 	file_data->node = ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7126) 	list_add_tail(&ref_node->node, &file_data->ref_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7127) 	spin_unlock_bh(&file_data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7128) 	percpu_ref_get(&file_data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7131) static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7133) 	struct fixed_file_data *data = ctx->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7134) 	struct fixed_file_ref_node *backup_node, *ref_node = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7135) 	unsigned nr_tables, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7136) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7138) 	if (!data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7139) 		return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7140) 	backup_node = alloc_fixed_file_ref_node(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7141) 	if (!backup_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7142) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7144) 	spin_lock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7145) 	ref_node = data->node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7146) 	spin_unlock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7147) 	if (ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7148) 		percpu_ref_kill(&ref_node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7150) 	percpu_ref_kill(&data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7152) 	/* wait for all refs nodes to complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7153) 	flush_delayed_work(&ctx->file_put_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7154) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7155) 		ret = wait_for_completion_interruptible(&data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7156) 		if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7157) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7158) 		ret = io_run_task_work_sig();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7159) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7160) 			percpu_ref_resurrect(&data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7161) 			reinit_completion(&data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7162) 			io_sqe_files_set_node(data, backup_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7163) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7164) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7165) 	} while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7167) 	__io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7168) 	nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7169) 	for (i = 0; i < nr_tables; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7170) 		kfree(data->table[i].files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7171) 	kfree(data->table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7172) 	percpu_ref_exit(&data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7173) 	kfree(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7174) 	ctx->file_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7175) 	ctx->nr_user_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7176) 	destroy_fixed_file_ref_node(backup_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7177) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7180) static void io_put_sq_data(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7182) 	if (refcount_dec_and_test(&sqd->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7183) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7184) 		 * The park is a bit of a work-around, without it we get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7185) 		 * warning spews on shutdown with SQPOLL set and affinity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7186) 		 * set to a single CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7187) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7188) 		if (sqd->thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7189) 			kthread_park(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7190) 			kthread_stop(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7191) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7193) 		kfree(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7194) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7197) static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7199) 	struct io_ring_ctx *ctx_attach;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7200) 	struct io_sq_data *sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7201) 	struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7203) 	f = fdget(p->wq_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7204) 	if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7205) 		return ERR_PTR(-ENXIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7206) 	if (f.file->f_op != &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7207) 		fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7208) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7209) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7211) 	ctx_attach = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7212) 	sqd = ctx_attach->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7213) 	if (!sqd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7214) 		fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7215) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7216) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7218) 	refcount_inc(&sqd->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7219) 	fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7220) 	return sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7223) static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7224) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7225) 	struct io_sq_data *sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7227) 	if (p->flags & IORING_SETUP_ATTACH_WQ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7228) 		return io_attach_sq_data(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7230) 	sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7231) 	if (!sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7232) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7234) 	refcount_set(&sqd->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7235) 	INIT_LIST_HEAD(&sqd->ctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7236) 	INIT_LIST_HEAD(&sqd->ctx_new_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7237) 	mutex_init(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7238) 	mutex_init(&sqd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7239) 	init_waitqueue_head(&sqd->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7240) 	return sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7243) static void io_sq_thread_unpark(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7244) 	__releases(&sqd->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7246) 	if (!sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7247) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7248) 	kthread_unpark(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7249) 	mutex_unlock(&sqd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7252) static void io_sq_thread_park(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7253) 	__acquires(&sqd->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7255) 	if (!sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7256) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7257) 	mutex_lock(&sqd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7258) 	kthread_park(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7261) static void io_sq_thread_stop(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7263) 	struct io_sq_data *sqd = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7265) 	if (sqd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7266) 		if (sqd->thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7267) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7268) 			 * We may arrive here from the error branch in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7269) 			 * io_sq_offload_create() where the kthread is created
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7270) 			 * without being waked up, thus wake it up now to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7271) 			 * sure the wait will complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7272) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7273) 			wake_up_process(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7274) 			wait_for_completion(&ctx->sq_thread_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7276) 			io_sq_thread_park(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7277) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7279) 		mutex_lock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7280) 		list_del(&ctx->sqd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7281) 		mutex_unlock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7283) 		if (sqd->thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7284) 			finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7285) 			io_sq_thread_unpark(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7286) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7288) 		io_put_sq_data(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7289) 		ctx->sq_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7290) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7293) static void io_finish_async(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7294) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7295) 	io_sq_thread_stop(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7297) 	if (ctx->io_wq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7298) 		io_wq_destroy(ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7299) 		ctx->io_wq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7300) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7303) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7305)  * Ensure the UNIX gc is aware of our file set, so we are certain that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7306)  * the io_uring can be safely unregistered on process exit, even if we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7307)  * loops in the file referencing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7308)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7309) static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7311) 	struct sock *sk = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7312) 	struct scm_fp_list *fpl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7313) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7314) 	int i, nr_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7316) 	fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7317) 	if (!fpl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7318) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7320) 	skb = alloc_skb(0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7321) 	if (!skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7322) 		kfree(fpl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7323) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7324) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7326) 	skb->sk = sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7328) 	nr_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7329) 	fpl->user = get_uid(ctx->user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7330) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7331) 		struct file *file = io_file_from_index(ctx, i + offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7333) 		if (!file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7334) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7335) 		fpl->fp[nr_files] = get_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7336) 		unix_inflight(fpl->user, fpl->fp[nr_files]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7337) 		nr_files++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7338) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7340) 	if (nr_files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7341) 		fpl->max = SCM_MAX_FD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7342) 		fpl->count = nr_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7343) 		UNIXCB(skb).fp = fpl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7344) 		skb->destructor = unix_destruct_scm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7345) 		refcount_add(skb->truesize, &sk->sk_wmem_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7346) 		skb_queue_head(&sk->sk_receive_queue, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7348) 		for (i = 0; i < nr_files; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7349) 			fput(fpl->fp[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7350) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7351) 		kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7352) 		free_uid(fpl->user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7353) 		kfree(fpl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7354) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7356) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7359) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7360)  * If UNIX sockets are enabled, fd passing can cause a reference cycle which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7361)  * causes regular reference counting to break down. We rely on the UNIX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7362)  * garbage collection to take care of this problem for us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7363)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7364) static int io_sqe_files_scm(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7366) 	unsigned left, total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7367) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7369) 	total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7370) 	left = ctx->nr_user_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7371) 	while (left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7372) 		unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7374) 		ret = __io_sqe_files_scm(ctx, this_files, total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7375) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7376) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7377) 		left -= this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7378) 		total += this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7379) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7381) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7382) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7384) 	while (total < ctx->nr_user_files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7385) 		struct file *file = io_file_from_index(ctx, total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7387) 		if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7388) 			fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7389) 		total++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7390) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7392) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7394) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7395) static int io_sqe_files_scm(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7397) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7399) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7401) static int io_sqe_alloc_file_tables(struct fixed_file_data *file_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7402) 				    unsigned nr_tables, unsigned nr_files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7404) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7406) 	for (i = 0; i < nr_tables; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7407) 		struct fixed_file_table *table = &file_data->table[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7408) 		unsigned this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7410) 		this_files = min(nr_files, IORING_MAX_FILES_TABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7411) 		table->files = kcalloc(this_files, sizeof(struct file *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7412) 					GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7413) 		if (!table->files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7414) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7415) 		nr_files -= this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7416) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7418) 	if (i == nr_tables)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7419) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7421) 	for (i = 0; i < nr_tables; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7422) 		struct fixed_file_table *table = &file_data->table[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7423) 		kfree(table->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7424) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7425) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7428) static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7430) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7431) 	struct sock *sock = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7432) 	struct sk_buff_head list, *head = &sock->sk_receive_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7433) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7434) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7436) 	__skb_queue_head_init(&list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7438) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7439) 	 * Find the skb that holds this file in its SCM_RIGHTS. When found,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7440) 	 * remove this entry and rearrange the file array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7441) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7442) 	skb = skb_dequeue(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7443) 	while (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7444) 		struct scm_fp_list *fp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7446) 		fp = UNIXCB(skb).fp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7447) 		for (i = 0; i < fp->count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7448) 			int left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7450) 			if (fp->fp[i] != file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7451) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7453) 			unix_notinflight(fp->user, fp->fp[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7454) 			left = fp->count - 1 - i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7455) 			if (left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7456) 				memmove(&fp->fp[i], &fp->fp[i + 1],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7457) 						left * sizeof(struct file *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7458) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7459) 			fp->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7460) 			if (!fp->count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7461) 				kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7462) 				skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7463) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7464) 				__skb_queue_tail(&list, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7465) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7466) 			fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7467) 			file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7468) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7469) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7471) 		if (!file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7472) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7474) 		__skb_queue_tail(&list, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7476) 		skb = skb_dequeue(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7477) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7479) 	if (skb_peek(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7480) 		spin_lock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7481) 		while ((skb = __skb_dequeue(&list)) != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7482) 			__skb_queue_tail(head, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7483) 		spin_unlock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7484) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7485) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7486) 	fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7487) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7490) struct io_file_put {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7491) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7492) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7493) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7495) static void __io_file_put_work(struct fixed_file_ref_node *ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7497) 	struct fixed_file_data *file_data = ref_node->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7498) 	struct io_ring_ctx *ctx = file_data->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7499) 	struct io_file_put *pfile, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7501) 	list_for_each_entry_safe(pfile, tmp, &ref_node->file_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7502) 		list_del(&pfile->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7503) 		io_ring_file_put(ctx, pfile->file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7504) 		kfree(pfile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7505) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7507) 	percpu_ref_exit(&ref_node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7508) 	kfree(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7509) 	percpu_ref_put(&file_data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7512) static void io_file_put_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7514) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7515) 	struct llist_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7517) 	ctx = container_of(work, struct io_ring_ctx, file_put_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7518) 	node = llist_del_all(&ctx->file_put_llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7520) 	while (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7521) 		struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7522) 		struct llist_node *next = node->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7524) 		ref_node = llist_entry(node, struct fixed_file_ref_node, llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7525) 		__io_file_put_work(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7526) 		node = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7530) static void io_file_data_ref_zero(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7532) 	struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7533) 	struct fixed_file_data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7534) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7535) 	bool first_add = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7536) 	int delay = HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7538) 	ref_node = container_of(ref, struct fixed_file_ref_node, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7539) 	data = ref_node->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7540) 	ctx = data->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7542) 	spin_lock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7543) 	ref_node->done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7545) 	while (!list_empty(&data->ref_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7546) 		ref_node = list_first_entry(&data->ref_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7547) 					struct fixed_file_ref_node, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7548) 		/* recycle ref nodes in order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7549) 		if (!ref_node->done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7550) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7551) 		list_del(&ref_node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7552) 		first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7553) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7554) 	spin_unlock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7556) 	if (percpu_ref_is_dying(&data->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7557) 		delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7559) 	if (!delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7560) 		mod_delayed_work(system_wq, &ctx->file_put_work, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7561) 	else if (first_add)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7562) 		queue_delayed_work(system_wq, &ctx->file_put_work, delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7565) static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7566) 			struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7568) 	struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7570) 	ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7571) 	if (!ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7572) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7574) 	if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7575) 			    0, GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7576) 		kfree(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7577) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7578) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7579) 	INIT_LIST_HEAD(&ref_node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7580) 	INIT_LIST_HEAD(&ref_node->file_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7581) 	ref_node->file_data = ctx->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7582) 	ref_node->done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7583) 	return ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7586) static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7588) 	percpu_ref_exit(&ref_node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7589) 	kfree(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7592) static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7593) 				 unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7595) 	__s32 __user *fds = (__s32 __user *) arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7596) 	unsigned nr_tables, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7597) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7598) 	int fd, ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7599) 	struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7600) 	struct fixed_file_data *file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7602) 	if (ctx->file_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7603) 		return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7604) 	if (!nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7605) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7606) 	if (nr_args > IORING_MAX_FIXED_FILES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7607) 		return -EMFILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7608) 	if (nr_args > rlimit(RLIMIT_NOFILE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7609) 		return -EMFILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7611) 	file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7612) 	if (!file_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7613) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7614) 	file_data->ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7615) 	init_completion(&file_data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7616) 	INIT_LIST_HEAD(&file_data->ref_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7617) 	spin_lock_init(&file_data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7619) 	nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7620) 	file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7621) 				   GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7622) 	if (!file_data->table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7623) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7625) 	if (percpu_ref_init(&file_data->refs, io_file_ref_kill,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7626) 				PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7627) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7629) 	if (io_sqe_alloc_file_tables(file_data, nr_tables, nr_args))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7630) 		goto out_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7631) 	ctx->file_data = file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7633) 	for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7634) 		struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7635) 		unsigned index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7637) 		if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7638) 			ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7639) 			goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7640) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7641) 		/* allow sparse sets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7642) 		if (fd == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7643) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7645) 		file = fget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7646) 		ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7647) 		if (!file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7648) 			goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7650) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7651) 		 * Don't allow io_uring instances to be registered. If UNIX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7652) 		 * isn't enabled, then this causes a reference cycle and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7653) 		 * instance can never get freed. If UNIX is enabled we'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7654) 		 * handle it just fine, but there's still no point in allowing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7655) 		 * a ring fd as it doesn't support regular read/write anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7656) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7657) 		if (file->f_op == &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7658) 			fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7659) 			goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7660) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7661) 		table = &file_data->table[i >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7662) 		index = i & IORING_FILE_TABLE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7663) 		table->files[index] = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7664) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7666) 	ret = io_sqe_files_scm(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7667) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7668) 		io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7669) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7670) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7672) 	ref_node = alloc_fixed_file_ref_node(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7673) 	if (!ref_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7674) 		io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7675) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7676) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7678) 	io_sqe_files_set_node(file_data, ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7679) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7680) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7681) 	for (i = 0; i < ctx->nr_user_files; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7682) 		file = io_file_from_index(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7683) 		if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7684) 			fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7685) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7686) 	for (i = 0; i < nr_tables; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7687) 		kfree(file_data->table[i].files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7688) 	ctx->nr_user_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7689) out_ref:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7690) 	percpu_ref_exit(&file_data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7691) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7692) 	kfree(file_data->table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7693) 	kfree(file_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7694) 	ctx->file_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7695) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7698) static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7699) 				int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7701) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7702) 	struct sock *sock = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7703) 	struct sk_buff_head *head = &sock->sk_receive_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7704) 	struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7706) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7707) 	 * See if we can merge this file into an existing skb SCM_RIGHTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7708) 	 * file set. If there's no room, fall back to allocating a new skb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7709) 	 * and filling it in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7710) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7711) 	spin_lock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7712) 	skb = skb_peek(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7713) 	if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7714) 		struct scm_fp_list *fpl = UNIXCB(skb).fp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7716) 		if (fpl->count < SCM_MAX_FD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7717) 			__skb_unlink(skb, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7718) 			spin_unlock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7719) 			fpl->fp[fpl->count] = get_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7720) 			unix_inflight(fpl->user, fpl->fp[fpl->count]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7721) 			fpl->count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7722) 			spin_lock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7723) 			__skb_queue_head(head, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7724) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7725) 			skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7726) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7727) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7728) 	spin_unlock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7730) 	if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7731) 		fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7732) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7733) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7735) 	return __io_sqe_files_scm(ctx, 1, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7736) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7737) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7738) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7741) static int io_queue_file_removal(struct fixed_file_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7742) 				 struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7744) 	struct io_file_put *pfile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7745) 	struct fixed_file_ref_node *ref_node = data->node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7747) 	pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7748) 	if (!pfile)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7749) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7751) 	pfile->file = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7752) 	list_add(&pfile->list, &ref_node->file_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7754) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7757) static int __io_sqe_files_update(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7758) 				 struct io_uring_files_update *up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7759) 				 unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7760) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7761) 	struct fixed_file_data *data = ctx->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7762) 	struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7763) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7764) 	__s32 __user *fds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7765) 	int fd, i, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7766) 	__u32 done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7767) 	bool needs_switch = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7769) 	if (check_add_overflow(up->offset, nr_args, &done))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7770) 		return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7771) 	if (done > ctx->nr_user_files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7772) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7774) 	ref_node = alloc_fixed_file_ref_node(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7775) 	if (!ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7776) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7778) 	done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7779) 	fds = u64_to_user_ptr(up->fds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7780) 	while (nr_args) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7781) 		struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7782) 		unsigned index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7784) 		err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7785) 		if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7786) 			err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7787) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7788) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7789) 		i = array_index_nospec(up->offset, ctx->nr_user_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7790) 		table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7791) 		index = i & IORING_FILE_TABLE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7792) 		if (table->files[index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7793) 			file = table->files[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7794) 			err = io_queue_file_removal(data, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7795) 			if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7796) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7797) 			table->files[index] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7798) 			needs_switch = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7799) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7800) 		if (fd != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7801) 			file = fget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7802) 			if (!file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7803) 				err = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7804) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7805) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7806) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7807) 			 * Don't allow io_uring instances to be registered. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7808) 			 * UNIX isn't enabled, then this causes a reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7809) 			 * cycle and this instance can never get freed. If UNIX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7810) 			 * is enabled we'll handle it just fine, but there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7811) 			 * still no point in allowing a ring fd as it doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7812) 			 * support regular read/write anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7813) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7814) 			if (file->f_op == &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7815) 				fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7816) 				err = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7817) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7818) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7819) 			table->files[index] = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7820) 			err = io_sqe_file_register(ctx, file, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7821) 			if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7822) 				table->files[index] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7823) 				fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7824) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7825) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7826) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7827) 		nr_args--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7828) 		done++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7829) 		up->offset++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7830) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7832) 	if (needs_switch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7833) 		percpu_ref_kill(&data->node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7834) 		io_sqe_files_set_node(data, ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7835) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7836) 		destroy_fixed_file_ref_node(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7838) 	return done ? done : err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7841) static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7842) 			       unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7844) 	struct io_uring_files_update up;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7846) 	if (!ctx->file_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7847) 		return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7848) 	if (!nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7849) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7850) 	if (copy_from_user(&up, arg, sizeof(up)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7851) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7852) 	if (up.resv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7853) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7855) 	return __io_sqe_files_update(ctx, &up, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7858) static void io_free_work(struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7859) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7860) 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7862) 	/* Consider that io_steal_work() relies on this ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7863) 	io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7866) static int io_init_wq_offload(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7867) 			      struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7869) 	struct io_wq_data data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7870) 	struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7871) 	struct io_ring_ctx *ctx_attach;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7872) 	unsigned int concurrency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7873) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7875) 	data.user = ctx->user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7876) 	data.free_work = io_free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7877) 	data.do_work = io_wq_submit_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7879) 	if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7880) 		/* Do QD, or 4 * CPUS, whatever is smallest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7881) 		concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7883) 		ctx->io_wq = io_wq_create(concurrency, &data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7884) 		if (IS_ERR(ctx->io_wq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7885) 			ret = PTR_ERR(ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7886) 			ctx->io_wq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7887) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7888) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7889) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7891) 	f = fdget(p->wq_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7892) 	if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7893) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7895) 	if (f.file->f_op != &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7896) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7897) 		goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7898) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7900) 	ctx_attach = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7901) 	/* @io_wq is protected by holding the fd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7902) 	if (!io_wq_get(ctx_attach->io_wq, &data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7903) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7904) 		goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7905) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7907) 	ctx->io_wq = ctx_attach->io_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7908) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7909) 	fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7910) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7913) static int io_uring_alloc_task_context(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7914) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7915) 	struct io_uring_task *tctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7916) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7918) 	tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7919) 	if (unlikely(!tctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7920) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7922) 	ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7923) 	if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7924) 		kfree(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7925) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7926) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7928) 	xa_init(&tctx->xa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7929) 	init_waitqueue_head(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7930) 	tctx->last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7931) 	atomic_set(&tctx->in_idle, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7932) 	tctx->sqpoll = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7933) 	io_init_identity(&tctx->__identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7934) 	tctx->identity = &tctx->__identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7935) 	task->io_uring = tctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7936) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7939) void __io_uring_free(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7941) 	struct io_uring_task *tctx = tsk->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7943) 	WARN_ON_ONCE(!xa_empty(&tctx->xa));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7944) 	WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7945) 	if (tctx->identity != &tctx->__identity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7946) 		kfree(tctx->identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7947) 	percpu_counter_destroy(&tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7948) 	kfree(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7949) 	tsk->io_uring = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7952) static int io_sq_offload_create(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7953) 				struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7955) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7957) 	if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7958) 		struct io_sq_data *sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7960) 		ret = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7961) 		if (!capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7962) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7964) 		sqd = io_get_sq_data(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7965) 		if (IS_ERR(sqd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7966) 			ret = PTR_ERR(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7967) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7968) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7970) 		ctx->sq_data = sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7971) 		io_sq_thread_park(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7972) 		mutex_lock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7973) 		list_add(&ctx->sqd_list, &sqd->ctx_new_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7974) 		mutex_unlock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7975) 		io_sq_thread_unpark(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7977) 		ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7978) 		if (!ctx->sq_thread_idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7979) 			ctx->sq_thread_idle = HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7981) 		if (sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7982) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7984) 		if (p->flags & IORING_SETUP_SQ_AFF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7985) 			int cpu = p->sq_thread_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7987) 			ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7988) 			if (cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7989) 				goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7990) 			if (!cpu_online(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7991) 				goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7993) 			sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7994) 							cpu, "io_uring-sq");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7995) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7996) 			sqd->thread = kthread_create(io_sq_thread, sqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7997) 							"io_uring-sq");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7998) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7999) 		if (IS_ERR(sqd->thread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8000) 			ret = PTR_ERR(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8001) 			sqd->thread = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8002) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8003) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8004) 		ret = io_uring_alloc_task_context(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8005) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8006) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8007) 	} else if (p->flags & IORING_SETUP_SQ_AFF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8008) 		/* Can't have SQ_AFF without SQPOLL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8009) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8010) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8011) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8013) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8014) 	ret = io_init_wq_offload(ctx, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8015) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8016) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8018) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8019) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8020) 	io_finish_async(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8021) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8024) static void io_sq_offload_start(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8026) 	struct io_sq_data *sqd = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8028) 	ctx->flags &= ~IORING_SETUP_R_DISABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8029) 	if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd && sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8030) 		wake_up_process(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8033) static inline void __io_unaccount_mem(struct user_struct *user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8034) 				      unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8036) 	atomic_long_sub(nr_pages, &user->locked_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8037) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8039) static inline int __io_account_mem(struct user_struct *user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8040) 				   unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8041) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8042) 	unsigned long page_limit, cur_pages, new_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8044) 	/* Don't allow more pages than we can safely lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8045) 	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8047) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8048) 		cur_pages = atomic_long_read(&user->locked_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8049) 		new_pages = cur_pages + nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8050) 		if (new_pages > page_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8051) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8052) 	} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8053) 					new_pages) != cur_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8055) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8058) static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8059) 			     enum io_mem_account acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8060) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8061) 	if (ctx->limit_mem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8062) 		__io_unaccount_mem(ctx->user, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8064) 	if (ctx->mm_account) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8065) 		if (acct == ACCT_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8066) 			ctx->mm_account->locked_vm -= nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8067) 		else if (acct == ACCT_PINNED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8068) 			atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8069) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8072) static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8073) 			  enum io_mem_account acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8075) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8076) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8077) 	if (ctx->limit_mem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8078) 		ret = __io_account_mem(ctx->user, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8079) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8080) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8081) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8083) 	if (ctx->mm_account) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8084) 		if (acct == ACCT_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8085) 			ctx->mm_account->locked_vm += nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8086) 		else if (acct == ACCT_PINNED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8087) 			atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8088) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8090) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8093) static void io_mem_free(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8095) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8097) 	if (!ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8098) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8100) 	page = virt_to_head_page(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8101) 	if (put_page_testzero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8102) 		free_compound_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8105) static void *io_mem_alloc(size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8107) 	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8108) 				__GFP_NORETRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8110) 	return (void *) __get_free_pages(gfp_flags, get_order(size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8113) static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8114) 				size_t *sq_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8116) 	struct io_rings *rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8117) 	size_t off, sq_array_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8119) 	off = struct_size(rings, cqes, cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8120) 	if (off == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8121) 		return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8123) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8124) 	off = ALIGN(off, SMP_CACHE_BYTES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8125) 	if (off == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8126) 		return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8127) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8129) 	if (sq_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8130) 		*sq_offset = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8132) 	sq_array_size = array_size(sizeof(u32), sq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8133) 	if (sq_array_size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8134) 		return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8136) 	if (check_add_overflow(off, sq_array_size, &off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8137) 		return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8139) 	return off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8142) static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8144) 	size_t pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8146) 	pages = (size_t)1 << get_order(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8147) 		rings_size(sq_entries, cq_entries, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8148) 	pages += (size_t)1 << get_order(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8149) 		array_size(sizeof(struct io_uring_sqe), sq_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8151) 	return pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8154) static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8156) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8158) 	if (!ctx->user_bufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8159) 		return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8161) 	for (i = 0; i < ctx->nr_user_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8162) 		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8164) 		for (j = 0; j < imu->nr_bvecs; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8165) 			unpin_user_page(imu->bvec[j].bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8167) 		if (imu->acct_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8168) 			io_unaccount_mem(ctx, imu->acct_pages, ACCT_PINNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8169) 		kvfree(imu->bvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8170) 		imu->nr_bvecs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8171) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8173) 	kfree(ctx->user_bufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8174) 	ctx->user_bufs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8175) 	ctx->nr_user_bufs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8176) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8179) static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8180) 		       void __user *arg, unsigned index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8182) 	struct iovec __user *src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8184) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8185) 	if (ctx->compat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8186) 		struct compat_iovec __user *ciovs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8187) 		struct compat_iovec ciov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8189) 		ciovs = (struct compat_iovec __user *) arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8190) 		if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8191) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8193) 		dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8194) 		dst->iov_len = ciov.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8195) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8196) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8197) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8198) 	src = (struct iovec __user *) arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8199) 	if (copy_from_user(dst, &src[index], sizeof(*dst)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8200) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8201) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8205)  * Not super efficient, but this is just a registration time. And we do cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8206)  * the last compound head, so generally we'll only do a full search if we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8207)  * match that one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8208)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8209)  * We check if the given compound head page has already been accounted, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8210)  * avoid double accounting it. This allows us to account the full size of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8211)  * page, not just the constituent pages of a huge page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8212)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8213) static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8214) 				  int nr_pages, struct page *hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8216) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8218) 	/* check current page array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8219) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8220) 		if (!PageCompound(pages[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8221) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8222) 		if (compound_head(pages[i]) == hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8223) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8224) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8226) 	/* check previously registered pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8227) 	for (i = 0; i < ctx->nr_user_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8228) 		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8230) 		for (j = 0; j < imu->nr_bvecs; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8231) 			if (!PageCompound(imu->bvec[j].bv_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8232) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8233) 			if (compound_head(imu->bvec[j].bv_page) == hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8234) 				return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8235) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8236) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8238) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8241) static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8242) 				 int nr_pages, struct io_mapped_ubuf *imu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8243) 				 struct page **last_hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8245) 	int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8247) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8248) 		if (!PageCompound(pages[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8249) 			imu->acct_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8250) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8251) 			struct page *hpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8253) 			hpage = compound_head(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8254) 			if (hpage == *last_hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8255) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8256) 			*last_hpage = hpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8257) 			if (headpage_already_acct(ctx, pages, i, hpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8258) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8259) 			imu->acct_pages += page_size(hpage) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8260) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8263) 	if (!imu->acct_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8264) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8266) 	ret = io_account_mem(ctx, imu->acct_pages, ACCT_PINNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8267) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8268) 		imu->acct_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8269) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8272) static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8273) 				  unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8275) 	struct vm_area_struct **vmas = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8276) 	struct page **pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8277) 	struct page *last_hpage = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8278) 	int i, j, got_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8279) 	int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8281) 	if (ctx->user_bufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8282) 		return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8283) 	if (!nr_args || nr_args > UIO_MAXIOV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8284) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8286) 	ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8287) 					GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8288) 	if (!ctx->user_bufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8289) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8291) 	for (i = 0; i < nr_args; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8292) 		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8293) 		unsigned long off, start, end, ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8294) 		int pret, nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8295) 		struct iovec iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8296) 		size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8298) 		ret = io_copy_iov(ctx, &iov, arg, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8299) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8300) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8301) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8302) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8303) 		 * Don't impose further limits on the size and buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8304) 		 * constraints here, we'll -EINVAL later when IO is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8305) 		 * submitted if they are wrong.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8306) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8307) 		ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8308) 		if (!iov.iov_base || !iov.iov_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8309) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8311) 		/* arbitrary limit, but we need something */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8312) 		if (iov.iov_len > SZ_1G)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8313) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8315) 		ubuf = (unsigned long) iov.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8316) 		end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8317) 		start = ubuf >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8318) 		nr_pages = end - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8320) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8321) 		if (!pages || nr_pages > got_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8322) 			kvfree(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8323) 			kvfree(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8324) 			pages = kvmalloc_array(nr_pages, sizeof(struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8325) 						GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8326) 			vmas = kvmalloc_array(nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8327) 					sizeof(struct vm_area_struct *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8328) 					GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8329) 			if (!pages || !vmas) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8330) 				ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8331) 				goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8332) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8333) 			got_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8334) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8336) 		imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8337) 						GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8338) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8339) 		if (!imu->bvec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8340) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8342) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8343) 		mmap_read_lock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8344) 		pret = pin_user_pages(ubuf, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8345) 				      FOLL_WRITE | FOLL_LONGTERM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8346) 				      pages, vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8347) 		if (pret == nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8348) 			/* don't support file backed memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8349) 			for (j = 0; j < nr_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8350) 				struct vm_area_struct *vma = vmas[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8352) 				if (vma->vm_file &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8353) 				    !is_file_hugepages(vma->vm_file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8354) 					ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8355) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8356) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8357) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8358) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8359) 			ret = pret < 0 ? pret : -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8360) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8361) 		mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8362) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8363) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8364) 			 * if we did partial map, or found file backed vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8365) 			 * release any pages we did get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8366) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8367) 			if (pret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8368) 				unpin_user_pages(pages, pret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8369) 			kvfree(imu->bvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8370) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8371) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8373) 		ret = io_buffer_account_pin(ctx, pages, pret, imu, &last_hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8374) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8375) 			unpin_user_pages(pages, pret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8376) 			kvfree(imu->bvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8377) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8378) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8380) 		off = ubuf & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8381) 		size = iov.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8382) 		for (j = 0; j < nr_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8383) 			size_t vec_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8385) 			vec_len = min_t(size_t, size, PAGE_SIZE - off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8386) 			imu->bvec[j].bv_page = pages[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8387) 			imu->bvec[j].bv_len = vec_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8388) 			imu->bvec[j].bv_offset = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8389) 			off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8390) 			size -= vec_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8391) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8392) 		/* store original address for later verification */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8393) 		imu->ubuf = ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8394) 		imu->len = iov.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8395) 		imu->nr_bvecs = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8397) 		ctx->nr_user_bufs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8398) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8399) 	kvfree(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8400) 	kvfree(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8401) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8402) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8403) 	kvfree(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8404) 	kvfree(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8405) 	io_sqe_buffer_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8406) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8409) static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8411) 	__s32 __user *fds = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8412) 	int fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8414) 	if (ctx->cq_ev_fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8415) 		return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8417) 	if (copy_from_user(&fd, fds, sizeof(*fds)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8418) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8420) 	ctx->cq_ev_fd = eventfd_ctx_fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8421) 	if (IS_ERR(ctx->cq_ev_fd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8422) 		int ret = PTR_ERR(ctx->cq_ev_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8423) 		ctx->cq_ev_fd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8424) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8425) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8427) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8430) static int io_eventfd_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8431) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8432) 	if (ctx->cq_ev_fd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8433) 		eventfd_ctx_put(ctx->cq_ev_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8434) 		ctx->cq_ev_fd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8435) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8436) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8438) 	return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8441) static void io_destroy_buffers(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8443) 	struct io_buffer *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8444) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8446) 	xa_for_each(&ctx->io_buffers, index, buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8447) 		__io_remove_buffers(ctx, buf, index, -1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8450) static void io_ring_ctx_free(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8451) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8452) 	io_finish_async(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8453) 	io_sqe_buffer_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8455) 	if (ctx->sqo_task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8456) 		put_task_struct(ctx->sqo_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8457) 		ctx->sqo_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8458) 		mmdrop(ctx->mm_account);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8459) 		ctx->mm_account = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8460) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8462) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8463) 	if (ctx->sqo_blkcg_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8464) 		css_put(ctx->sqo_blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8465) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8467) 	io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8468) 	io_eventfd_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8469) 	io_destroy_buffers(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8471) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8472) 	if (ctx->ring_sock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8473) 		ctx->ring_sock->file = NULL; /* so that iput() is called */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8474) 		sock_release(ctx->ring_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8475) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8476) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8478) 	io_mem_free(ctx->rings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8479) 	io_mem_free(ctx->sq_sqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8481) 	percpu_ref_exit(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8482) 	free_uid(ctx->user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8483) 	put_cred(ctx->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8484) 	kfree(ctx->cancel_hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8485) 	kmem_cache_free(req_cachep, ctx->fallback_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8486) 	kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8489) static __poll_t io_uring_poll(struct file *file, poll_table *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8491) 	struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8492) 	__poll_t mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8494) 	poll_wait(file, &ctx->cq_wait, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8495) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8496) 	 * synchronizes with barrier from wq_has_sleeper call in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8497) 	 * io_commit_cqring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8498) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8499) 	smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8500) 	if (!io_sqring_full(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8501) 		mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8503) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8504) 	 * Don't flush cqring overflow list here, just do a simple check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8505) 	 * Otherwise there could possible be ABBA deadlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8506) 	 *      CPU0                    CPU1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8507) 	 *      ----                    ----
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8508) 	 * lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8509) 	 *                              lock(&ep->mtx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8510) 	 *                              lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8511) 	 * lock(&ep->mtx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8512) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8513) 	 * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8514) 	 * pushs them to do the flush.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8515) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8516) 	if (io_cqring_events(ctx) || test_bit(0, &ctx->cq_check_overflow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8517) 		mask |= EPOLLIN | EPOLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8519) 	return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8522) static int io_uring_fasync(int fd, struct file *file, int on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8524) 	struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8526) 	return fasync_helper(fd, file, on, &ctx->cq_fasync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8529) static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8531) 	struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8533) 	iod = xa_erase(&ctx->personalities, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8534) 	if (iod) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8535) 		put_cred(iod->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8536) 		if (refcount_dec_and_test(&iod->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8537) 			kfree(iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8538) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8539) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8541) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8544) static void io_ring_exit_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8546) 	struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8547) 					       exit_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8549) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8550) 	 * If we're doing polled IO and end up having requests being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8551) 	 * submitted async (out-of-line), then completions can come in while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8552) 	 * we're waiting for refs to drop. We need to reap these manually,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8553) 	 * as nobody else will be looking for them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8554) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8555) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8556) 		io_iopoll_try_reap_events(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8557) 	} while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8558) 	io_ring_ctx_free(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8561) static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8563) 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8565) 	return req->ctx == data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8568) static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8570) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8571) 	struct io_identify *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8573) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8574) 	percpu_ref_kill(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8575) 	/* if force is set, the ring is going away. always drop after that */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8577) 	if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8578) 		ctx->sqo_dead = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8580) 	ctx->cq_overflow_flushed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8581) 	if (ctx->rings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8582) 		__io_cqring_overflow_flush(ctx, true, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8583) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8585) 	io_kill_timeouts(ctx, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8586) 	io_poll_remove_all(ctx, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8588) 	if (ctx->io_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8589) 		io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8591) 	/* if we failed setting up the ctx, we might not have any rings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8592) 	io_iopoll_try_reap_events(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8593) 	xa_for_each(&ctx->personalities, index, iod)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8594) 		 io_unregister_personality(ctx, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8596) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8597) 	 * Do this upfront, so we won't have a grace period where the ring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8598) 	 * is closed but resources aren't reaped yet. This can cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8599) 	 * spurious failure in setting up a new ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8600) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8601) 	io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8602) 			 ACCT_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8604) 	INIT_WORK(&ctx->exit_work, io_ring_exit_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8605) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8606) 	 * Use system_unbound_wq to avoid spawning tons of event kworkers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8607) 	 * if we're exiting a ton of rings at the same time. It just adds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8608) 	 * noise and overhead, there's no discernable change in runtime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8609) 	 * over using system_wq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8610) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8611) 	queue_work(system_unbound_wq, &ctx->exit_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8614) static int io_uring_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8616) 	struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8618) 	file->private_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8619) 	io_ring_ctx_wait_and_kill(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8620) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8623) struct io_task_cancel {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8624) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8625) 	struct files_struct *files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8626) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8628) static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8630) 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8631) 	struct io_task_cancel *cancel = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8632) 	bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8634) 	if (cancel->files && (req->flags & REQ_F_LINK_TIMEOUT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8635) 		unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8636) 		struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8638) 		/* protect against races with linked timeouts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8639) 		spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8640) 		ret = io_match_task(req, cancel->task, cancel->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8641) 		spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8642) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8643) 		ret = io_match_task(req, cancel->task, cancel->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8644) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8645) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8648) static void io_cancel_defer_files(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8649) 				  struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8650) 				  struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8652) 	struct io_defer_entry *de = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8653) 	LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8655) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8656) 	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8657) 		if (io_match_task(de->req, task, files)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8658) 			list_cut_position(&list, &ctx->defer_list, &de->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8659) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8660) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8661) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8662) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8664) 	while (!list_empty(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8665) 		de = list_first_entry(&list, struct io_defer_entry, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8666) 		list_del_init(&de->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8667) 		req_set_fail_links(de->req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8668) 		io_put_req(de->req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8669) 		io_req_complete(de->req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8670) 		kfree(de);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8671) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8674) static int io_uring_count_inflight(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8675) 				   struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8676) 				   struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8677) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8678) 	struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8679) 	int cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8681) 	spin_lock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8682) 	list_for_each_entry(req, &ctx->inflight_list, inflight_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8683) 		cnt += io_match_task(req, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8684) 	spin_unlock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8685) 	return cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8688) static void io_uring_cancel_files(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8689) 				  struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8690) 				  struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8692) 	while (!list_empty_careful(&ctx->inflight_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8693) 		struct io_task_cancel cancel = { .task = task, .files = files };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8694) 		DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8695) 		int inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8697) 		inflight = io_uring_count_inflight(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8698) 		if (!inflight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8699) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8701) 		io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8702) 		io_poll_remove_all(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8703) 		io_kill_timeouts(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8704) 		/* cancellations _may_ trigger task work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8705) 		io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8707) 		prepare_to_wait(&task->io_uring->wait, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8708) 				TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8709) 		if (inflight == io_uring_count_inflight(ctx, task, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8710) 			schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8711) 		finish_wait(&task->io_uring->wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8712) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8715) static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8716) 					    struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8718) 	while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8719) 		struct io_task_cancel cancel = { .task = task, .files = NULL, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8720) 		enum io_wq_cancel cret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8721) 		bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8723) 		cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8724) 		if (cret != IO_WQ_CANCEL_NOTFOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8725) 			ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8727) 		/* SQPOLL thread does its own polling */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8728) 		if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8729) 			while (!list_empty_careful(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8730) 				io_iopoll_try_reap_events(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8731) 				ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8732) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8733) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8735) 		ret |= io_poll_remove_all(ctx, task, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8736) 		ret |= io_kill_timeouts(ctx, task, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8737) 		if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8738) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8739) 		io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8740) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8741) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8744) static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8746) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8747) 	ctx->sqo_dead = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8748) 	if (ctx->flags & IORING_SETUP_R_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8749) 		io_sq_offload_start(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8750) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8752) 	/* make sure callers enter the ring to get error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8753) 	if (ctx->rings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8754) 		io_ring_set_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8757) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8758)  * We need to iteratively cancel requests, in case a request has dependent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8759)  * hard links. These persist even for failure of cancelations, hence keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8760)  * looping until none are found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8761)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8762) static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8763) 					  struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8765) 	struct task_struct *task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8767) 	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8768) 		io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8769) 		task = ctx->sq_data->thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8770) 		atomic_inc(&task->io_uring->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8771) 		io_sq_thread_park(ctx->sq_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8772) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8774) 	io_cancel_defer_files(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8775) 	io_cqring_overflow_flush(ctx, true, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8777) 	if (!files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8778) 		__io_uring_cancel_task_requests(ctx, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8779) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8780) 		io_uring_cancel_files(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8782) 	if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8783) 		atomic_dec(&task->io_uring->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8784) 		io_sq_thread_unpark(ctx->sq_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8785) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8788) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8789)  * Note that this task has used io_uring. We use it for cancelation purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8790)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8791) static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8792) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8793) 	struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8794) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8796) 	if (unlikely(!tctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8797) 		ret = io_uring_alloc_task_context(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8798) 		if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8799) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8800) 		tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8801) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8802) 	if (tctx->last != file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8803) 		void *old = xa_load(&tctx->xa, (unsigned long)file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8805) 		if (!old) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8806) 			get_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8807) 			ret = xa_err(xa_store(&tctx->xa, (unsigned long)file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8808) 						file, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8809) 			if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8810) 				fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8811) 				return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8812) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8813) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8814) 		tctx->last = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8815) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8817) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8818) 	 * This is race safe in that the task itself is doing this, hence it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8819) 	 * cannot be going through the exit/cancel paths at the same time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8820) 	 * This cannot be modified while exit/cancel is running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8821) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8822) 	if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8823) 		tctx->sqpoll = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8825) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8828) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8829)  * Remove this io_uring_file -> task mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8830)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8831) static void io_uring_del_task_file(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8833) 	struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8835) 	if (tctx->last == file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8836) 		tctx->last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8837) 	file = xa_erase(&tctx->xa, (unsigned long)file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8838) 	if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8839) 		fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8842) static void io_uring_remove_task_files(struct io_uring_task *tctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8844) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8845) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8847) 	xa_for_each(&tctx->xa, index, file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8848) 		io_uring_del_task_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8851) void __io_uring_files_cancel(struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8853) 	struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8854) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8855) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8857) 	/* make sure overflow events are dropped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8858) 	atomic_inc(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8859) 	xa_for_each(&tctx->xa, index, file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8860) 		io_uring_cancel_task_requests(file->private_data, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8861) 	atomic_dec(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8863) 	if (files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8864) 		io_uring_remove_task_files(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8867) static s64 tctx_inflight(struct io_uring_task *tctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8869) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8870) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8871) 	s64 inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8873) 	inflight = percpu_counter_sum(&tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8874) 	if (!tctx->sqpoll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8875) 		return inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8877) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8878) 	 * If we have SQPOLL rings, then we need to iterate and find them, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8879) 	 * add the pending count for those.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8880) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8881) 	xa_for_each(&tctx->xa, index, file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8882) 		struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8884) 		if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8885) 			struct io_uring_task *__tctx = ctx->sqo_task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8887) 			inflight += percpu_counter_sum(&__tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8888) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8889) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8891) 	return inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8894) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8895)  * Find any io_uring fd that this task has registered or done IO on, and cancel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8896)  * requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8897)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8898) void __io_uring_task_cancel(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8900) 	struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8901) 	DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8902) 	s64 inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8904) 	/* make sure overflow events are dropped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8905) 	atomic_inc(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8907) 	/* trigger io_disable_sqo_submit() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8908) 	if (tctx->sqpoll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8909) 		__io_uring_files_cancel(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8911) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8912) 		/* read completions before cancelations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8913) 		inflight = tctx_inflight(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8914) 		if (!inflight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8915) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8916) 		__io_uring_files_cancel(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8918) 		prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8920) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8921) 		 * If we've seen completions, retry without waiting. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8922) 		 * avoids a race where a completion comes in before we did
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8923) 		 * prepare_to_wait().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8924) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8925) 		if (inflight == tctx_inflight(tctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8926) 			schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8927) 		finish_wait(&tctx->wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8928) 	} while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8930) 	atomic_dec(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8932) 	io_uring_remove_task_files(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8935) static int io_uring_flush(struct file *file, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8937) 	struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8938) 	struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8940) 	if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8941) 		io_uring_cancel_task_requests(ctx, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8943) 	if (!tctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8944) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8946) 	/* we should have cancelled and erased it before PF_EXITING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8947) 	WARN_ON_ONCE((current->flags & PF_EXITING) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8948) 		     xa_load(&tctx->xa, (unsigned long)file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8950) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8951) 	 * fput() is pending, will be 2 if the only other ref is our potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8952) 	 * task file note. If the task is exiting, drop regardless of count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8953) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8954) 	if (atomic_long_read(&file->f_count) != 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8955) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8957) 	if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8958) 		/* there is only one file note, which is owned by sqo_task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8959) 		WARN_ON_ONCE(ctx->sqo_task != current &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8960) 			     xa_load(&tctx->xa, (unsigned long)file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8961) 		/* sqo_dead check is for when this happens after cancellation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8962) 		WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8963) 			     !xa_load(&tctx->xa, (unsigned long)file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8965) 		io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8966) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8968) 	if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8969) 		io_uring_del_task_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8970) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8973) static void *io_uring_validate_mmap_request(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8974) 					    loff_t pgoff, size_t sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8976) 	struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8977) 	loff_t offset = pgoff << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8978) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8979) 	void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8981) 	switch (offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8982) 	case IORING_OFF_SQ_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8983) 	case IORING_OFF_CQ_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8984) 		ptr = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8985) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8986) 	case IORING_OFF_SQES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8987) 		ptr = ctx->sq_sqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8988) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8989) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8990) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8991) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8993) 	page = virt_to_head_page(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8994) 	if (sz > page_size(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8995) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8997) 	return ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9000) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9002) static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9004) 	size_t sz = vma->vm_end - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9005) 	unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9006) 	void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9008) 	ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9009) 	if (IS_ERR(ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9010) 		return PTR_ERR(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9012) 	pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9013) 	return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9016) #else /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9018) static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9019) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9020) 	return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9023) static unsigned int io_uring_nommu_mmap_capabilities(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9025) 	return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9028) static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9029) 	unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9030) 	unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9032) 	void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9034) 	ptr = io_uring_validate_mmap_request(file, pgoff, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9035) 	if (IS_ERR(ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9036) 		return PTR_ERR(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9038) 	return (unsigned long) ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9041) #endif /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9043) static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9045) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9046) 	DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9048) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9049) 		if (!io_sqring_full(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9050) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9052) 		prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9054) 		if (unlikely(ctx->sqo_dead)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9055) 			ret = -EOWNERDEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9056) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9057) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9059) 		if (!io_sqring_full(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9060) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9062) 		schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9063) 	} while (!signal_pending(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9065) 	finish_wait(&ctx->sqo_sq_wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9066) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9067) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9070) SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9071) 		u32, min_complete, u32, flags, const sigset_t __user *, sig,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9072) 		size_t, sigsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9074) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9075) 	long ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9076) 	int submitted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9077) 	struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9079) 	io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9081) 	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9082) 			IORING_ENTER_SQ_WAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9083) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9085) 	f = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9086) 	if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9087) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9089) 	ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9090) 	if (f.file->f_op != &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9091) 		goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9093) 	ret = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9094) 	ctx = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9095) 	if (!percpu_ref_tryget(&ctx->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9096) 		goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9098) 	ret = -EBADFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9099) 	if (ctx->flags & IORING_SETUP_R_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9100) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9102) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9103) 	 * For SQ polling, the thread will do all submissions and completions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9104) 	 * Just return the requested submit count, and wake the thread if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9105) 	 * we were asked to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9106) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9107) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9108) 	if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9109) 		io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9111) 		if (unlikely(ctx->sqo_dead)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9112) 			ret = -EOWNERDEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9113) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9114) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9115) 		if (flags & IORING_ENTER_SQ_WAKEUP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9116) 			wake_up(&ctx->sq_data->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9117) 		if (flags & IORING_ENTER_SQ_WAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9118) 			ret = io_sqpoll_wait_sq(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9119) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9120) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9121) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9122) 		submitted = to_submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9123) 	} else if (to_submit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9124) 		ret = io_uring_add_task_file(ctx, f.file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9125) 		if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9126) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9127) 		mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9128) 		submitted = io_submit_sqes(ctx, to_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9129) 		mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9131) 		if (submitted != to_submit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9132) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9133) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9134) 	if (flags & IORING_ENTER_GETEVENTS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9135) 		min_complete = min(min_complete, ctx->cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9137) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9138) 		 * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9139) 		 * space applications don't need to do io completion events
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9140) 		 * polling again, they can rely on io_sq_thread to do polling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9141) 		 * work, which can reduce cpu usage and uring_lock contention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9142) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9143) 		if (ctx->flags & IORING_SETUP_IOPOLL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9144) 		    !(ctx->flags & IORING_SETUP_SQPOLL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9145) 			ret = io_iopoll_check(ctx, min_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9146) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9147) 			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9148) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9149) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9151) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9152) 	percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9153) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9154) 	fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9155) 	return submitted ? submitted : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9158) #ifdef CONFIG_PROC_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9159) static int io_uring_show_cred(struct seq_file *m, unsigned int id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9160) 		const struct io_identity *iod)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9162) 	const struct cred *cred = iod->creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9163) 	struct user_namespace *uns = seq_user_ns(m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9164) 	struct group_info *gi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9165) 	kernel_cap_t cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9166) 	unsigned __capi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9167) 	int g;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9169) 	seq_printf(m, "%5d\n", id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9170) 	seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9171) 	seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9172) 	seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9173) 	seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9174) 	seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9175) 	seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9176) 	seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9177) 	seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9178) 	seq_puts(m, "\n\tGroups:\t");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9179) 	gi = cred->group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9180) 	for (g = 0; g < gi->ngroups; g++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9181) 		seq_put_decimal_ull(m, g ? " " : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9182) 					from_kgid_munged(uns, gi->gid[g]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9183) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9184) 	seq_puts(m, "\n\tCapEff:\t");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9185) 	cap = cred->cap_effective;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9186) 	CAP_FOR_EACH_U32(__capi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9187) 		seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9188) 	seq_putc(m, '\n');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9189) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9192) static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9194) 	struct io_sq_data *sq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9195) 	bool has_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9196) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9198) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9199) 	 * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9200) 	 * since fdinfo case grabs it in the opposite direction of normal use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9201) 	 * cases. If we fail to get the lock, we just don't iterate any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9202) 	 * structures that could be going away outside the io_uring mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9203) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9204) 	has_lock = mutex_trylock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9206) 	if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9207) 		sq = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9209) 	seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9210) 	seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9211) 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9212) 	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9213) 		struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9214) 		struct file *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9216) 		table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9217) 		f = table->files[i & IORING_FILE_TABLE_MASK];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9218) 		if (f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9219) 			seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9220) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9221) 			seq_printf(m, "%5u: <none>\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9222) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9223) 	seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9224) 	for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9225) 		struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9227) 		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9228) 						(unsigned int) buf->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9229) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9230) 	if (has_lock && !xa_empty(&ctx->personalities)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9231) 		unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9232) 		const struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9234) 		seq_printf(m, "Personalities:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9235) 		xa_for_each(&ctx->personalities, index, iod)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9236) 			io_uring_show_cred(m, index, iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9237) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9238) 	seq_printf(m, "PollList:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9239) 	spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9240) 	for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9241) 		struct hlist_head *list = &ctx->cancel_hash[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9242) 		struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9244) 		hlist_for_each_entry(req, list, hash_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9245) 			seq_printf(m, "  op=%d, task_works=%d\n", req->opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9246) 					req->task->task_works != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9247) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9248) 	spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9249) 	if (has_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9250) 		mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9253) static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9255) 	struct io_ring_ctx *ctx = f->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9257) 	if (percpu_ref_tryget(&ctx->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9258) 		__io_uring_show_fdinfo(ctx, m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9259) 		percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9260) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9262) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9264) static const struct file_operations io_uring_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9265) 	.release	= io_uring_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9266) 	.flush		= io_uring_flush,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9267) 	.mmap		= io_uring_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9268) #ifndef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9269) 	.get_unmapped_area = io_uring_nommu_get_unmapped_area,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9270) 	.mmap_capabilities = io_uring_nommu_mmap_capabilities,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9271) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9272) 	.poll		= io_uring_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9273) 	.fasync		= io_uring_fasync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9274) #ifdef CONFIG_PROC_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9275) 	.show_fdinfo	= io_uring_show_fdinfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9276) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9277) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9279) static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9280) 				  struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9281) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9282) 	struct io_rings *rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9283) 	size_t size, sq_array_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9285) 	/* make sure these are sane, as we already accounted them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9286) 	ctx->sq_entries = p->sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9287) 	ctx->cq_entries = p->cq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9289) 	size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9290) 	if (size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9291) 		return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9293) 	rings = io_mem_alloc(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9294) 	if (!rings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9295) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9297) 	ctx->rings = rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9298) 	ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9299) 	rings->sq_ring_mask = p->sq_entries - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9300) 	rings->cq_ring_mask = p->cq_entries - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9301) 	rings->sq_ring_entries = p->sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9302) 	rings->cq_ring_entries = p->cq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9303) 	ctx->sq_mask = rings->sq_ring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9304) 	ctx->cq_mask = rings->cq_ring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9306) 	size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9307) 	if (size == SIZE_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9308) 		io_mem_free(ctx->rings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9309) 		ctx->rings = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9310) 		return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9311) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9313) 	ctx->sq_sqes = io_mem_alloc(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9314) 	if (!ctx->sq_sqes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9315) 		io_mem_free(ctx->rings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9316) 		ctx->rings = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9317) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9318) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9320) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9323) static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9325) 	int ret, fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9327) 	fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9328) 	if (fd < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9329) 		return fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9331) 	ret = io_uring_add_task_file(ctx, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9332) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9333) 		put_unused_fd(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9334) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9335) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9336) 	fd_install(fd, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9337) 	return fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9340) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9341)  * Allocate an anonymous fd, this is what constitutes the application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9342)  * visible backing of an io_uring instance. The application mmaps this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9343)  * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9344)  * we have to tie this fd to a socket for file garbage collection purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9345)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9346) static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9348) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9349) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9350) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9352) 	ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9353) 				&ctx->ring_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9354) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9355) 		return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9356) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9358) 	file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9359) 					O_RDWR | O_CLOEXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9360) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9361) 	if (IS_ERR(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9362) 		sock_release(ctx->ring_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9363) 		ctx->ring_sock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9364) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9365) 		ctx->ring_sock->file = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9366) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9367) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9368) 	return file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9371) static int io_uring_create(unsigned entries, struct io_uring_params *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9372) 			   struct io_uring_params __user *params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9374) 	struct user_struct *user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9375) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9376) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9377) 	bool limit_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9378) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9380) 	if (!entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9381) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9382) 	if (entries > IORING_MAX_ENTRIES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9383) 		if (!(p->flags & IORING_SETUP_CLAMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9384) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9385) 		entries = IORING_MAX_ENTRIES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9386) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9388) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9389) 	 * Use twice as many entries for the CQ ring. It's possible for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9390) 	 * application to drive a higher depth than the size of the SQ ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9391) 	 * since the sqes are only used at submission time. This allows for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9392) 	 * some flexibility in overcommitting a bit. If the application has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9393) 	 * set IORING_SETUP_CQSIZE, it will have passed in the desired number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9394) 	 * of CQ ring entries manually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9395) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9396) 	p->sq_entries = roundup_pow_of_two(entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9397) 	if (p->flags & IORING_SETUP_CQSIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9398) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9399) 		 * If IORING_SETUP_CQSIZE is set, we do the same roundup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9400) 		 * to a power-of-two, if it isn't already. We do NOT impose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9401) 		 * any cq vs sq ring sizing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9402) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9403) 		if (!p->cq_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9404) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9405) 		if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9406) 			if (!(p->flags & IORING_SETUP_CLAMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9407) 				return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9408) 			p->cq_entries = IORING_MAX_CQ_ENTRIES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9409) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9410) 		p->cq_entries = roundup_pow_of_two(p->cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9411) 		if (p->cq_entries < p->sq_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9412) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9413) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9414) 		p->cq_entries = 2 * p->sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9415) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9417) 	user = get_uid(current_user());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9418) 	limit_mem = !capable(CAP_IPC_LOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9420) 	if (limit_mem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9421) 		ret = __io_account_mem(user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9422) 				ring_pages(p->sq_entries, p->cq_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9423) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9424) 			free_uid(user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9425) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9426) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9427) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9429) 	ctx = io_ring_ctx_alloc(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9430) 	if (!ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9431) 		if (limit_mem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9432) 			__io_unaccount_mem(user, ring_pages(p->sq_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9433) 								p->cq_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9434) 		free_uid(user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9435) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9436) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9437) 	ctx->compat = in_compat_syscall();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9438) 	ctx->user = user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9439) 	ctx->creds = get_current_cred();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9440) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9441) 	ctx->loginuid = current->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9442) 	ctx->sessionid = current->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9443) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9444) 	ctx->sqo_task = get_task_struct(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9446) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9447) 	 * This is just grabbed for accounting purposes. When a process exits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9448) 	 * the mm is exited and dropped before the files, hence we need to hang
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9449) 	 * on to this mm purely for the purposes of being able to unaccount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9450) 	 * memory (locked/pinned vm). It's not used for anything else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9451) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9452) 	mmgrab(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9453) 	ctx->mm_account = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9455) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9456) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9457) 	 * The sq thread will belong to the original cgroup it was inited in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9458) 	 * If the cgroup goes offline (e.g. disabling the io controller), then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9459) 	 * issued bios will be associated with the closest cgroup later in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9460) 	 * block layer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9461) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9462) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9463) 	ctx->sqo_blkcg_css = blkcg_css();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9464) 	ret = css_tryget_online(ctx->sqo_blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9465) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9466) 	if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9467) 		/* don't init against a dying cgroup, have the user try again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9468) 		ctx->sqo_blkcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9469) 		ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9470) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9471) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9472) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9474) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9475) 	 * Account memory _before_ installing the file descriptor. Once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9476) 	 * the descriptor is installed, it can get closed at any time. Also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9477) 	 * do this before hitting the general error path, as ring freeing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9478) 	 * will un-account as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9479) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9480) 	io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9481) 		       ACCT_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9482) 	ctx->limit_mem = limit_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9484) 	ret = io_allocate_scq_urings(ctx, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9485) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9486) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9488) 	ret = io_sq_offload_create(ctx, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9489) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9490) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9492) 	if (!(p->flags & IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9493) 		io_sq_offload_start(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9495) 	memset(&p->sq_off, 0, sizeof(p->sq_off));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9496) 	p->sq_off.head = offsetof(struct io_rings, sq.head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9497) 	p->sq_off.tail = offsetof(struct io_rings, sq.tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9498) 	p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9499) 	p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9500) 	p->sq_off.flags = offsetof(struct io_rings, sq_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9501) 	p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9502) 	p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9504) 	memset(&p->cq_off, 0, sizeof(p->cq_off));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9505) 	p->cq_off.head = offsetof(struct io_rings, cq.head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9506) 	p->cq_off.tail = offsetof(struct io_rings, cq.tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9507) 	p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9508) 	p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9509) 	p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9510) 	p->cq_off.cqes = offsetof(struct io_rings, cqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9511) 	p->cq_off.flags = offsetof(struct io_rings, cq_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9513) 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9514) 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9515) 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9516) 			IORING_FEAT_POLL_32BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9518) 	if (copy_to_user(params, p, sizeof(*p))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9519) 		ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9520) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9521) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9523) 	file = io_uring_get_file(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9524) 	if (IS_ERR(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9525) 		ret = PTR_ERR(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9526) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9529) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9530) 	 * Install ring fd as the very last thing, so we don't risk someone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9531) 	 * having closed it before we finish setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9532) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9533) 	ret = io_uring_install_fd(ctx, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9534) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9535) 		io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9536) 		/* fput will clean it up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9537) 		fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9538) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9539) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9541) 	trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9542) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9543) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9544) 	io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9545) 	io_ring_ctx_wait_and_kill(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9546) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9549) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9550)  * Sets up an aio uring context, and returns the fd. Applications asks for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9551)  * ring size, we return the actual sq/cq ring sizes (among other things) in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9552)  * params structure passed in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9553)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9554) static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9556) 	struct io_uring_params p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9557) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9559) 	if (copy_from_user(&p, params, sizeof(p)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9560) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9561) 	for (i = 0; i < ARRAY_SIZE(p.resv); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9562) 		if (p.resv[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9563) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9564) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9566) 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9567) 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9568) 			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9569) 			IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9570) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9572) 	return  io_uring_create(entries, &p, params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9575) SYSCALL_DEFINE2(io_uring_setup, u32, entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9576) 		struct io_uring_params __user *, params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9578) 	return io_uring_setup(entries, params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9581) static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9583) 	struct io_uring_probe *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9584) 	size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9585) 	int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9587) 	size = struct_size(p, ops, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9588) 	if (size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9589) 		return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9590) 	p = kzalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9591) 	if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9592) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9594) 	ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9595) 	if (copy_from_user(p, arg, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9596) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9597) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9598) 	if (memchr_inv(p, 0, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9599) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9601) 	p->last_op = IORING_OP_LAST - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9602) 	if (nr_args > IORING_OP_LAST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9603) 		nr_args = IORING_OP_LAST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9605) 	for (i = 0; i < nr_args; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9606) 		p->ops[i].op = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9607) 		if (!io_op_defs[i].not_supported)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9608) 			p->ops[i].flags = IO_URING_OP_SUPPORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9609) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9610) 	p->ops_len = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9612) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9613) 	if (copy_to_user(arg, p, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9614) 		ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9615) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9616) 	kfree(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9617) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9620) static int io_register_personality(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9622) 	struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9623) 	u32 id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9624) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9626) 	iod = kmalloc(sizeof(*iod), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9627) 	if (unlikely(!iod))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9628) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9630) 	io_init_identity(iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9631) 	iod->creds = get_current_cred();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9633) 	ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)iod,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9634) 			XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9635) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9636) 		put_cred(iod->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9637) 		kfree(iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9638) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9639) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9640) 	return id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9643) static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9644) 				    unsigned int nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9646) 	struct io_uring_restriction *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9647) 	size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9648) 	int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9650) 	/* Restrictions allowed only if rings started disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9651) 	if (!(ctx->flags & IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9652) 		return -EBADFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9654) 	/* We allow only a single restrictions registration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9655) 	if (ctx->restrictions.registered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9656) 		return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9658) 	if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9659) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9661) 	size = array_size(nr_args, sizeof(*res));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9662) 	if (size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9663) 		return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9665) 	res = memdup_user(arg, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9666) 	if (IS_ERR(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9667) 		return PTR_ERR(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9669) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9671) 	for (i = 0; i < nr_args; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9672) 		switch (res[i].opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9673) 		case IORING_RESTRICTION_REGISTER_OP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9674) 			if (res[i].register_op >= IORING_REGISTER_LAST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9675) 				ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9676) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9677) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9679) 			__set_bit(res[i].register_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9680) 				  ctx->restrictions.register_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9681) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9682) 		case IORING_RESTRICTION_SQE_OP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9683) 			if (res[i].sqe_op >= IORING_OP_LAST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9684) 				ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9685) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9686) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9688) 			__set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9689) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9690) 		case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9691) 			ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9692) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9693) 		case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9694) 			ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9695) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9696) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9697) 			ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9698) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9699) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9700) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9702) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9703) 	/* Reset all restrictions if an error happened */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9704) 	if (ret != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9705) 		memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9706) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9707) 		ctx->restrictions.registered = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9709) 	kfree(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9710) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9713) static int io_register_enable_rings(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9715) 	if (!(ctx->flags & IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9716) 		return -EBADFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9718) 	if (ctx->restrictions.registered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9719) 		ctx->restricted = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9721) 	io_sq_offload_start(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9722) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9725) static bool io_register_op_must_quiesce(int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9727) 	switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9728) 	case IORING_UNREGISTER_FILES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9729) 	case IORING_REGISTER_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9730) 	case IORING_REGISTER_PROBE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9731) 	case IORING_REGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9732) 	case IORING_UNREGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9733) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9734) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9735) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9736) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9739) static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9740) 			       void __user *arg, unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9741) 	__releases(ctx->uring_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9742) 	__acquires(ctx->uring_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9744) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9746) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9747) 	 * We're inside the ring mutex, if the ref is already dying, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9748) 	 * someone else killed the ctx or is already going through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9749) 	 * io_uring_register().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9750) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9751) 	if (percpu_ref_is_dying(&ctx->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9752) 		return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9754) 	if (io_register_op_must_quiesce(opcode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9755) 		percpu_ref_kill(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9757) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9758) 		 * Drop uring mutex before waiting for references to exit. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9759) 		 * another thread is currently inside io_uring_enter() it might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9760) 		 * need to grab the uring_lock to make progress. If we hold it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9761) 		 * here across the drain wait, then we can deadlock. It's safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9762) 		 * to drop the mutex here, since no new references will come in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9763) 		 * after we've killed the percpu ref.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9764) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9765) 		mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9766) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9767) 			ret = wait_for_completion_interruptible(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9768) 			if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9769) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9770) 			ret = io_run_task_work_sig();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9771) 			if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9772) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9773) 		} while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9774) 		mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9776) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9777) 			io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9778) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9779) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9780) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9782) 	if (ctx->restricted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9783) 		if (opcode >= IORING_REGISTER_LAST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9784) 			ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9785) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9786) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9788) 		if (!test_bit(opcode, ctx->restrictions.register_op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9789) 			ret = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9790) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9791) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9792) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9794) 	switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9795) 	case IORING_REGISTER_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9796) 		ret = io_sqe_buffer_register(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9797) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9798) 	case IORING_UNREGISTER_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9799) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9800) 		if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9801) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9802) 		ret = io_sqe_buffer_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9803) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9804) 	case IORING_REGISTER_FILES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9805) 		ret = io_sqe_files_register(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9806) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9807) 	case IORING_UNREGISTER_FILES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9808) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9809) 		if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9810) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9811) 		ret = io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9812) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9813) 	case IORING_REGISTER_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9814) 		ret = io_sqe_files_update(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9815) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9816) 	case IORING_REGISTER_EVENTFD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9817) 	case IORING_REGISTER_EVENTFD_ASYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9818) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9819) 		if (nr_args != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9820) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9821) 		ret = io_eventfd_register(ctx, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9822) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9823) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9824) 		if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9825) 			ctx->eventfd_async = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9826) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9827) 			ctx->eventfd_async = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9828) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9829) 	case IORING_UNREGISTER_EVENTFD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9830) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9831) 		if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9832) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9833) 		ret = io_eventfd_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9834) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9835) 	case IORING_REGISTER_PROBE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9836) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9837) 		if (!arg || nr_args > 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9838) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9839) 		ret = io_probe(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9840) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9841) 	case IORING_REGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9842) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9843) 		if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9844) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9845) 		ret = io_register_personality(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9846) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9847) 	case IORING_UNREGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9848) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9849) 		if (arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9850) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9851) 		ret = io_unregister_personality(ctx, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9852) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9853) 	case IORING_REGISTER_ENABLE_RINGS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9854) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9855) 		if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9856) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9857) 		ret = io_register_enable_rings(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9858) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9859) 	case IORING_REGISTER_RESTRICTIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9860) 		ret = io_register_restrictions(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9861) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9862) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9863) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9864) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9865) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9867) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9868) 	if (io_register_op_must_quiesce(opcode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9869) 		/* bring the ctx back to life */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9870) 		percpu_ref_reinit(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9871) 		reinit_completion(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9872) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9873) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9876) SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9877) 		void __user *, arg, unsigned int, nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9879) 	struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9880) 	long ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9881) 	struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9883) 	f = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9884) 	if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9885) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9887) 	ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9888) 	if (f.file->f_op != &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9889) 		goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9891) 	ctx = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9893) 	mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9894) 	ret = __io_uring_register(ctx, opcode, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9895) 	mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9896) 	trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9897) 							ctx->cq_ev_fd != NULL, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9898) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9899) 	fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9900) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9903) static int __init io_uring_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9905) #define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9906) 	BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9907) 	BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9908) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9910) #define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9911) 	__BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9912) 	BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9913) 	BUILD_BUG_SQE_ELEM(0,  __u8,   opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9914) 	BUILD_BUG_SQE_ELEM(1,  __u8,   flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9915) 	BUILD_BUG_SQE_ELEM(2,  __u16,  ioprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9916) 	BUILD_BUG_SQE_ELEM(4,  __s32,  fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9917) 	BUILD_BUG_SQE_ELEM(8,  __u64,  off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9918) 	BUILD_BUG_SQE_ELEM(8,  __u64,  addr2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9919) 	BUILD_BUG_SQE_ELEM(16, __u64,  addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9920) 	BUILD_BUG_SQE_ELEM(16, __u64,  splice_off_in);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9921) 	BUILD_BUG_SQE_ELEM(24, __u32,  len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9922) 	BUILD_BUG_SQE_ELEM(28,     __kernel_rwf_t, rw_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9923) 	BUILD_BUG_SQE_ELEM(28, /* compat */   int, rw_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9924) 	BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9925) 	BUILD_BUG_SQE_ELEM(28, __u32,  fsync_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9926) 	BUILD_BUG_SQE_ELEM(28, /* compat */ __u16,  poll_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9927) 	BUILD_BUG_SQE_ELEM(28, __u32,  poll32_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9928) 	BUILD_BUG_SQE_ELEM(28, __u32,  sync_range_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9929) 	BUILD_BUG_SQE_ELEM(28, __u32,  msg_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9930) 	BUILD_BUG_SQE_ELEM(28, __u32,  timeout_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9931) 	BUILD_BUG_SQE_ELEM(28, __u32,  accept_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9932) 	BUILD_BUG_SQE_ELEM(28, __u32,  cancel_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9933) 	BUILD_BUG_SQE_ELEM(28, __u32,  open_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9934) 	BUILD_BUG_SQE_ELEM(28, __u32,  statx_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9935) 	BUILD_BUG_SQE_ELEM(28, __u32,  fadvise_advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9936) 	BUILD_BUG_SQE_ELEM(28, __u32,  splice_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9937) 	BUILD_BUG_SQE_ELEM(32, __u64,  user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9938) 	BUILD_BUG_SQE_ELEM(40, __u16,  buf_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9939) 	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9940) 	BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9942) 	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9943) 	BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9944) 	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9945) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9946) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9947) __initcall(io_uring_init);