^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Shared application/kernel submission and completion ring pairs, for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * supporting fast/efficient IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * A note on the read/write ordering memory barriers that are matched between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * the application and kernel side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * After the application reads the CQ ring tail, it must use an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * before writing the tail (using smp_load_acquire to read the tail will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * do). It also needs a smp_mb() before updating CQ head (ordering the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * entry load(s) with the head store), pairing with an implicit barrier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * through a control-dependency in io_get_cqring (smp_store_release to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * store head will do). Failure to do so could lead to reading invalid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * CQ entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Likewise, the application must use an appropriate smp_wmb() before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * writing the SQ tail (ordering SQ entry stores with the tail store),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * which pairs with smp_load_acquire in io_get_sqring (smp_store_release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * to store the tail will do). And it needs a barrier ordering the SQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * head load before writing new SQ entries (smp_load_acquire to read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * head will do).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * updating the SQ tail; a full memory barrier smp_mb() is needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * between.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * Also see the examples in the liburing library:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * git://git.kernel.dk/liburing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * from data shared between the kernel and application. This is done both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * for ordering purposes, but also to ensure that once a value is loaded from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * data that the application could potentially modify, it remains stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * Copyright (C) 2018-2019 Jens Axboe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * Copyright (c) 2018-2019 Christoph Hellwig
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <linux/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <net/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <linux/refcount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <linux/bits.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include <linux/fdtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #include <linux/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #include <linux/bvec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #include <linux/net.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #include <net/sock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #include <net/af_unix.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #include <net/scm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #include <linux/anon_inodes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #include <linux/sizes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #include <linux/namei.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #include <linux/fsnotify.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #include <linux/fadvise.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #include <linux/eventpoll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #include <linux/fs_struct.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #include <linux/splice.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #include <linux/task_work.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #include <linux/io_uring.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #include <trace/events/io_uring.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #include <uapi/linux/io_uring.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #include "io-wq.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #define IORING_MAX_ENTRIES 32768
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * Shift of 9 is 512 entries, or exactly one page on 64-bit archs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #define IORING_FILE_TABLE_SHIFT 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define IORING_MAX_FILES_TABLE (1U << IORING_FILE_TABLE_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) #define IORING_FILE_TABLE_MASK (IORING_MAX_FILES_TABLE - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) #define IORING_MAX_FIXED_FILES (64 * IORING_MAX_FILES_TABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) IORING_REGISTER_LAST + IORING_OP_LAST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct io_uring {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) u32 head ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) u32 tail ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * This data is shared with the application through the mmap at offsets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * The offsets to the member fields are published through struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * io_sqring_offsets when calling io_uring_setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct io_rings {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * Head and tail offsets into the ring; the offsets need to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * masked to get valid indices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * The kernel controls head of the sq ring and the tail of the cq ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * and the application controls tail of the sq ring and the head of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * cq ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) struct io_uring sq, cq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * Bitmasks to apply to head and tail offsets (constant, equals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * ring_entries - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) u32 sq_ring_mask, cq_ring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* Ring sizes (constant, power of 2) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) u32 sq_ring_entries, cq_ring_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * Number of invalid entries dropped by the kernel due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * invalid index stored in array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * Written by the kernel, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * application (i.e. get number of "new events" by comparing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * cached value).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * After a new SQ head value was read by the application this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * counter includes all submissions that were dropped reaching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * the new SQ head (and possibly more).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) u32 sq_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * Runtime SQ flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * Written by the kernel, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * application.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * The application needs a full memory barrier before checking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) u32 sq_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * Runtime CQ flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * Written by the application, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) u32 cq_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * Number of completion events lost because the queue was full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * this should be avoided by the application by making sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * there are not more requests pending than there is space in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * the completion queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * Written by the kernel, shouldn't be modified by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * application (i.e. get number of "new events" by comparing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * cached value).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * As completion events come in out of order this counter is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * ordered with any other data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) u32 cq_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * Ring buffer of completion events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * The kernel writes completion events fresh every time they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * produced, so the application is allowed to modify pending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct io_mapped_ubuf {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) u64 ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) struct bio_vec *bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) unsigned int nr_bvecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) unsigned long acct_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) struct fixed_file_table {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) struct file **files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct fixed_file_ref_node {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) struct percpu_ref refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct list_head node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) struct list_head file_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct fixed_file_data *file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) struct llist_node llist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) bool done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) struct fixed_file_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) struct fixed_file_ref_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) struct percpu_ref refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) struct completion done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) struct list_head ref_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) struct io_buffer {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) __u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) __u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) __u16 bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) struct io_restriction {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) u8 sqe_flags_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) u8 sqe_flags_required;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) bool registered;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct io_sq_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) struct mutex lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /* ctx's that are using this sqd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) struct list_head ctx_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) struct list_head ctx_new_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) struct mutex ctx_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) struct task_struct *thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) struct wait_queue_head wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) struct io_ring_ctx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) struct percpu_ref refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) unsigned int compat: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) unsigned int limit_mem: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) unsigned int cq_overflow_flushed: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) unsigned int drain_next: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) unsigned int eventfd_async: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) unsigned int restricted: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) unsigned int sqo_dead: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * Ring buffer of indices into array of io_uring_sqe, which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * mmapped by the application using the IORING_OFF_SQES offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * This indirection could e.g. be used to assign fixed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * io_uring_sqe entries to operations and only submit them to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * the queue when needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * The kernel modifies neither the indices array nor the entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) u32 *sq_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) unsigned cached_sq_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) unsigned sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) unsigned sq_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) unsigned sq_thread_idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) unsigned cached_sq_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) unsigned cached_cq_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) unsigned long sq_check_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct list_head defer_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct list_head timeout_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) struct list_head cq_overflow_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) struct io_uring_sqe *sq_sqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) struct io_rings *rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) /* IO offload */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) struct io_wq *io_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * For SQPOLL usage - we hold a reference to the parent task, so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * have access to the ->files
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct task_struct *sqo_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) /* Only used for accounting purposes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) struct mm_struct *mm_account;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) struct cgroup_subsys_state *sqo_blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) struct io_sq_data *sq_data; /* if using sq thread polling */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) struct wait_queue_head sqo_sq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) struct wait_queue_entry sqo_wait_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct list_head sqd_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * If used, fixed file set. Writers must ensure that ->refs is dead,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * readers must ensure that ->refs is alive as long as the file* is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * used. Only updated through io_uring_register(2).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) struct fixed_file_data *file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) unsigned nr_user_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) /* if used, fixed mapped user buffers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) unsigned nr_user_bufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct io_mapped_ubuf *user_bufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) struct user_struct *user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) const struct cred *creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) kuid_t loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) unsigned int sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) struct completion ref_comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) struct completion sq_thread_comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) /* if all else fails... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) struct io_kiocb *fallback_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) struct socket *ring_sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct xarray io_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) struct xarray personalities;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) u32 pers_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) unsigned cached_cq_tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) unsigned cq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) unsigned cq_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) atomic_t cq_timeouts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) unsigned cq_last_tm_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) unsigned long cq_check_overflow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) struct wait_queue_head cq_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) struct fasync_struct *cq_fasync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) struct eventfd_ctx *cq_ev_fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) struct mutex uring_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) wait_queue_head_t wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) spinlock_t completion_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * ->iopoll_list is protected by the ctx->uring_lock for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * io_uring instances that don't use IORING_SETUP_SQPOLL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * For SQPOLL, only the single threaded io_sq_thread() will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * manipulate the list, hence no extra locking is needed there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) struct list_head iopoll_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) struct hlist_head *cancel_hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) unsigned cancel_hash_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) bool poll_multi_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) spinlock_t inflight_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) struct list_head inflight_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) struct delayed_work file_put_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) struct llist_head file_put_llist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) struct work_struct exit_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) struct io_restriction restrictions;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * First field must be the file pointer in all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * iocb unions! See also 'struct kiocb' in <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) struct io_poll_iocb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) struct wait_queue_head *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) __poll_t events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) bool done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) bool canceled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) struct wait_queue_entry wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) struct io_close {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) struct file *put_file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) int fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) struct io_timeout_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) struct hrtimer timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) struct timespec64 ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) enum hrtimer_mode mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) struct io_accept {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct sockaddr __user *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) int __user *addr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) unsigned long nofile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) struct io_sync {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) loff_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) loff_t off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) int mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) struct io_cancel {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) struct io_timeout {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) u32 off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) u32 target_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) struct io_timeout_rem {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) struct io_rw {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) /* NOTE: kiocb has the file as the first member, so don't do it here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) struct kiocb kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) u64 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) struct io_connect {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) struct sockaddr __user *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) int addr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) struct io_sr_msg {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) struct user_msghdr __user *umsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) void __user *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) int msg_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) int bgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) struct io_open {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) int dfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) bool ignore_nonblock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) struct filename *filename;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) struct open_how how;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) unsigned long nofile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) struct io_files_update {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) u64 arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) u32 nr_args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) u32 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) struct io_fadvise {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) u64 offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) u32 advice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) struct io_madvise {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) u32 advice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) struct io_epoll {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) int epfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) int op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) int fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) struct epoll_event event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) struct io_splice {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) struct file *file_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) struct file *file_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) loff_t off_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) loff_t off_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) u64 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) struct io_provide_buf {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) __u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) __u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) __u32 bgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) __u16 nbufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) __u16 bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) struct io_statx {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) int dfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) unsigned int mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) const char __user *filename;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) struct statx __user *buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) struct io_completion {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) u32 cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) struct io_async_connect {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) struct sockaddr_storage address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) struct io_async_msghdr {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) struct iovec fast_iov[UIO_FASTIOV];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) struct iovec *iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) struct sockaddr __user *uaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) struct msghdr msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) struct sockaddr_storage addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) struct io_async_rw {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) struct iovec fast_iov[UIO_FASTIOV];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) const struct iovec *free_iovec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) struct iov_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) size_t bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) struct wait_page_queue wpq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) REQ_F_LINK_HEAD_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) REQ_F_FAIL_LINK_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) REQ_F_INFLIGHT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) REQ_F_CUR_POS_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) REQ_F_NOWAIT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) REQ_F_LINK_TIMEOUT_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) REQ_F_ISREG_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) REQ_F_NEED_CLEANUP_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) REQ_F_POLLED_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) REQ_F_BUFFER_SELECTED_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) REQ_F_NO_FILE_TABLE_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) REQ_F_WORK_INITIALIZED_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) REQ_F_LTIMEOUT_ACTIVE_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) /* not a real bit, just to check we're not overflowing the space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) __REQ_F_LAST_BIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) /* ctx owns file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) /* drain existing IO first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) /* linked sqes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) REQ_F_LINK = BIT(REQ_F_LINK_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) /* doesn't sever on completion < 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) /* IOSQE_ASYNC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) /* IOSQE_BUFFER_SELECT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) /* head of a link */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) REQ_F_LINK_HEAD = BIT(REQ_F_LINK_HEAD_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) /* fail rest of links */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) /* on inflight list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /* read/write uses file position */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) /* must not punt to workers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) /* has or had linked timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) /* regular file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) /* needs cleanup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) /* already went through poll handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) /* buffer already selected */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) /* doesn't need file table for this request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) /* io_wq_work is initialized */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) /* linked timeout is active, i.e. prepared by link's head */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) struct async_poll {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) struct io_poll_iocb poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) struct io_poll_iocb *double_poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * NOTE! Each of the iocb union members has the file pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) * as the first entry in their struct definition. So you can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * access the file pointer through any of the sub-structs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * or directly as just 'ki_filp' in this struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) struct io_kiocb {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) struct io_rw rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) struct io_poll_iocb poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) struct io_accept accept;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) struct io_sync sync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) struct io_cancel cancel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) struct io_timeout timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) struct io_timeout_rem timeout_rem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) struct io_connect connect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) struct io_sr_msg sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) struct io_open open;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) struct io_close close;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) struct io_files_update files_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) struct io_fadvise fadvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) struct io_madvise madvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) struct io_epoll epoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) struct io_splice splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) struct io_provide_buf pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) struct io_statx statx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) /* use only after cleaning per-op data, see io_clean_op() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) struct io_completion compl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) /* opcode allocated if it needs to store data for async defer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) void *async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) u8 opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) /* polled IO has completed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) u8 iopoll_completed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) u16 buf_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) u32 result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) u64 user_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) struct list_head link_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * 1. used with ctx->iopoll_list with reads/writes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * 2. to track reqs with ->files (see io_op_def::file_table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) struct list_head inflight_entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) struct percpu_ref *fixed_file_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) struct callback_head task_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) struct hlist_node hash_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) struct async_poll *apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) struct io_wq_work work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) struct io_defer_entry {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) #define IO_IOPOLL_BATCH 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) struct io_comp_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) unsigned int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) struct io_submit_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) * io_kiocb alloc cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) void *reqs[IO_IOPOLL_BATCH];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) unsigned int free_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) * Batch completion logic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) struct io_comp_state comp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) * File reference cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) unsigned int fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) unsigned int has_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) unsigned int ios_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) struct io_op_def {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) /* needs req->file assigned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) unsigned needs_file : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) /* don't fail if file grab fails */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) unsigned needs_file_no_error : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) /* hash wq insertion if file is a regular file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) unsigned hash_reg_file : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) /* unbound wq insertion if file is a non-regular file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) unsigned unbound_nonreg_file : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) /* opcode is not supported by this kernel */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) unsigned not_supported : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) /* set if opcode supports polled "wait" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) unsigned pollin : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) unsigned pollout : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) /* op supports buffer selection */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) unsigned buffer_select : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) /* must always have async data allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) unsigned needs_async_data : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) /* size of async data needed, if any */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) unsigned short async_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) unsigned work_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) static const struct io_op_def io_op_defs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) [IORING_OP_NOP] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) [IORING_OP_READV] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) .pollin = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) .buffer_select = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) .needs_async_data = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) .async_size = sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) [IORING_OP_WRITEV] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) .hash_reg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) .pollout = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) .needs_async_data = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) .async_size = sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) IO_WQ_WORK_FSIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) [IORING_OP_FSYNC] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) .work_flags = IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) [IORING_OP_READ_FIXED] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) .pollin = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) .async_size = sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) [IORING_OP_WRITE_FIXED] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) .hash_reg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) .pollout = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) .async_size = sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) [IORING_OP_POLL_ADD] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) [IORING_OP_POLL_REMOVE] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) [IORING_OP_SYNC_FILE_RANGE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) .work_flags = IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) [IORING_OP_SENDMSG] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) .pollout = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) .needs_async_data = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) .async_size = sizeof(struct io_async_msghdr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) IO_WQ_WORK_FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) [IORING_OP_RECVMSG] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) .pollin = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) .buffer_select = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) .needs_async_data = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) .async_size = sizeof(struct io_async_msghdr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) IO_WQ_WORK_FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) [IORING_OP_TIMEOUT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) .needs_async_data = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) .async_size = sizeof(struct io_timeout_data),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) .work_flags = IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) [IORING_OP_TIMEOUT_REMOVE] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) [IORING_OP_ACCEPT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) .pollin = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) [IORING_OP_ASYNC_CANCEL] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) [IORING_OP_LINK_TIMEOUT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) .needs_async_data = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) .async_size = sizeof(struct io_timeout_data),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) .work_flags = IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) [IORING_OP_CONNECT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) .pollout = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) .needs_async_data = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) .async_size = sizeof(struct io_async_connect),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) .work_flags = IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) [IORING_OP_FALLOCATE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) [IORING_OP_OPENAT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) IO_WQ_WORK_FS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) [IORING_OP_CLOSE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) .needs_file_no_error = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) [IORING_OP_FILES_UPDATE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) [IORING_OP_STATX] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) [IORING_OP_READ] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) .pollin = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) .buffer_select = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) .async_size = sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) [IORING_OP_WRITE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) .hash_reg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) .pollout = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) .async_size = sizeof(struct io_async_rw),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) IO_WQ_WORK_FSIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) [IORING_OP_FADVISE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) .work_flags = IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) [IORING_OP_MADVISE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) [IORING_OP_SEND] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) .pollout = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) [IORING_OP_RECV] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) .pollin = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) .buffer_select = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) [IORING_OP_OPENAT2] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) [IORING_OP_EPOLL_CTL] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) .work_flags = IO_WQ_WORK_FILES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) [IORING_OP_SPLICE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) .hash_reg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) .work_flags = IO_WQ_WORK_BLKCG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) [IORING_OP_PROVIDE_BUFFERS] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) [IORING_OP_REMOVE_BUFFERS] = {},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) [IORING_OP_TEE] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) .needs_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) .hash_reg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) .unbound_nonreg_file = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) enum io_mem_account {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) ACCT_LOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) ACCT_PINNED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) struct io_ring_ctx *ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) struct io_comp_state *cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) static void io_cqring_fill_event(struct io_kiocb *req, long res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) static void io_put_req(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) static void io_put_req_deferred(struct io_kiocb *req, int nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) static void io_double_put_req(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) static void __io_queue_linked_timeout(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) static void io_queue_linked_timeout(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) static int __io_sqe_files_update(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) struct io_uring_files_update *ip,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) unsigned nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) static void __io_clean_op(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) static struct file *io_file_get(struct io_submit_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) struct io_kiocb *req, int fd, bool fixed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) static void io_file_put_work(struct work_struct *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) struct iovec **iovec, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) bool needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) const struct iovec *fast_iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) struct iov_iter *iter, bool force);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) static void io_req_drop_files(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) static void io_req_task_queue(struct io_kiocb *req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) static struct kmem_cache *req_cachep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) static const struct file_operations io_uring_fops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) struct sock *io_uring_get_socket(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) if (file->f_op == &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) return ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) EXPORT_SYMBOL(io_uring_get_socket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) static inline void io_clean_op(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) __io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static inline bool __io_match_files(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) if (req->file && req->file->f_op == &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) return ((req->flags & REQ_F_WORK_INITIALIZED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) (req->work.flags & IO_WQ_WORK_FILES)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) req->work.identity->files == files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) bool got = percpu_ref_tryget(ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) /* already at zero, wait for ->release() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) if (!got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) wait_for_completion(compl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) percpu_ref_resurrect(ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) percpu_ref_put(ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) static bool io_match_task(struct io_kiocb *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) struct io_kiocb *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) if (task && head->task != task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) /* in terms of cancelation, always match if req task is dead */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) if (head->task->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) if (!files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) if (__io_match_files(head, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) if (head->flags & REQ_F_LINK_HEAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) list_for_each_entry(link, &head->link_list, link_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) if (__io_match_files(link, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) static void io_sq_thread_drop_mm(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) kthread_unuse_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) current->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) if (current->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) if (current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) /* Should never happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) task_lock(ctx->sqo_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) mm = ctx->sqo_task->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) if (unlikely(!mm || !mmget_not_zero(mm)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) task_unlock(ctx->sqo_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) if (mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) kthread_use_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) return __io_sq_thread_acquire_mm(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) struct cgroup_subsys_state **cur_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) /* puts the old one when swapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) if (*cur_css != ctx->sqo_blkcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) kthread_associate_blkcg(ctx->sqo_blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) *cur_css = ctx->sqo_blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) static void io_sq_thread_unassociate_blkcg(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) kthread_associate_blkcg(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) static inline void req_set_fail_links(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) req->flags |= REQ_F_FAIL_LINK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) * None of these are dereferenced, they are simply used to check if any of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) * them have changed. If we're under current and check they are still the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) * same, we're fine to grab references to them for actual out-of-line use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) static void io_init_identity(struct io_identity *id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) id->files = current->files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) id->mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) id->blkcg_css = blkcg_css();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) id->creds = current_cred();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) id->nsproxy = current->nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) id->fs = current->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) id->fsize = rlimit(RLIMIT_FSIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) id->loginuid = current->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) id->sessionid = current->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) refcount_set(&id->count, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) static inline void __io_req_init_async(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) memset(&req->work, 0, sizeof(req->work));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) req->flags |= REQ_F_WORK_INITIALIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * Note: must call io_req_init_async() for the first time you
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * touch any members of io_wq_work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) static inline void io_req_init_async(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) struct io_uring_task *tctx = req->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) if (req->flags & REQ_F_WORK_INITIALIZED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) __io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) /* Grab a ref if this isn't our static identity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) req->work.identity = tctx->identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) if (tctx->identity != &tctx->__identity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) refcount_inc(&req->work.identity->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) static inline bool io_async_submit(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) return ctx->flags & IORING_SETUP_SQPOLL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) static void io_ring_ctx_ref_free(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) complete(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) static inline bool io_is_timeout_noseq(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) return !req->timeout.off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) int hash_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) ctx->fallback_req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) if (!ctx->fallback_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * Use 5 bits less than the max cq entries, that should give us around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * 32 entries per hash list if totally full and uniformly spread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) hash_bits = ilog2(p->cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) hash_bits -= 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (hash_bits <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) hash_bits = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) ctx->cancel_hash_bits = hash_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if (!ctx->cancel_hash)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) __hash_init(ctx->cancel_hash, 1U << hash_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) ctx->flags = p->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) init_waitqueue_head(&ctx->sqo_sq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) INIT_LIST_HEAD(&ctx->sqd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) init_waitqueue_head(&ctx->cq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) INIT_LIST_HEAD(&ctx->cq_overflow_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) init_completion(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) init_completion(&ctx->sq_thread_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) mutex_init(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) init_waitqueue_head(&ctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) spin_lock_init(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) INIT_LIST_HEAD(&ctx->iopoll_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) INIT_LIST_HEAD(&ctx->defer_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) INIT_LIST_HEAD(&ctx->timeout_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) spin_lock_init(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) INIT_LIST_HEAD(&ctx->inflight_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) INIT_DELAYED_WORK(&ctx->file_put_work, io_file_put_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) init_llist_head(&ctx->file_put_llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) return ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) if (ctx->fallback_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) kmem_cache_free(req_cachep, ctx->fallback_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) kfree(ctx->cancel_hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) static bool req_need_defer(struct io_kiocb *req, u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) return seq != ctx->cached_cq_tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) + READ_ONCE(ctx->cached_cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) static void __io_commit_cqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) /* order cqe stores with ring update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) smp_store_release(&rings->cq.tail, ctx->cached_cq_tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) if (req->work.identity == &tctx->__identity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (refcount_dec_and_test(&req->work.identity->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) kfree(req->work.identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) static void io_req_clean_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) if (!(req->flags & REQ_F_WORK_INITIALIZED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) req->flags &= ~REQ_F_WORK_INITIALIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (req->work.flags & IO_WQ_WORK_MM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) mmdrop(req->work.identity->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) req->work.flags &= ~IO_WQ_WORK_MM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) if (req->work.flags & IO_WQ_WORK_BLKCG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) css_put(req->work.identity->blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) req->work.flags &= ~IO_WQ_WORK_BLKCG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) if (req->work.flags & IO_WQ_WORK_CREDS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) put_cred(req->work.identity->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) req->work.flags &= ~IO_WQ_WORK_CREDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) if (req->work.flags & IO_WQ_WORK_FS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) struct fs_struct *fs = req->work.identity->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) spin_lock(&req->work.identity->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) if (--fs->users)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) fs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) spin_unlock(&req->work.identity->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) if (fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) free_fs_struct(fs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) req->work.flags &= ~IO_WQ_WORK_FS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) if (req->flags & REQ_F_INFLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) io_req_drop_files(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) io_put_identity(req->task->io_uring, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) * Create a private copy of io_identity, since some fields don't match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) * the current context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) static bool io_identity_cow(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) const struct cred *creds = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) struct io_identity *id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) if (req->work.flags & IO_WQ_WORK_CREDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) creds = req->work.identity->creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) if (unlikely(!id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) req->work.flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) * We can safely just re-init the creds we copied Either the field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) * matches the current one, or we haven't grabbed it yet. The only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) * exception is ->creds, through registered personalities, so handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) * that one separately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) io_init_identity(id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) if (creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) id->creds = creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) /* add one for this request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) refcount_inc(&id->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) /* drop tctx and req identity references, if needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) if (tctx->identity != &tctx->__identity &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) refcount_dec_and_test(&tctx->identity->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) kfree(tctx->identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) if (req->work.identity != &tctx->__identity &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) refcount_dec_and_test(&req->work.identity->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) kfree(req->work.identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) req->work.identity = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) tctx->identity = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) static bool io_grab_identity(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) const struct io_op_def *def = &io_op_defs[req->opcode];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) struct io_identity *id = req->work.identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) if (def->work_flags & IO_WQ_WORK_FSIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) if (id->fsize != rlimit(RLIMIT_FSIZE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) req->work.flags |= IO_WQ_WORK_FSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) (def->work_flags & IO_WQ_WORK_BLKCG)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) if (id->blkcg_css != blkcg_css()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) * This should be rare, either the cgroup is dying or the task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) * is moving cgroups. Just punt to root for the handful of ios.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (css_tryget_online(id->blkcg_css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) req->work.flags |= IO_WQ_WORK_BLKCG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) if (!(req->work.flags & IO_WQ_WORK_CREDS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) if (id->creds != current_cred())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) get_cred(id->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) req->work.flags |= IO_WQ_WORK_CREDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) if (!uid_eq(current->loginuid, id->loginuid) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) current->sessionid != id->sessionid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) if (!(req->work.flags & IO_WQ_WORK_FS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) (def->work_flags & IO_WQ_WORK_FS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) if (current->fs != id->fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) spin_lock(&id->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) if (!id->fs->in_exec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) id->fs->users++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) req->work.flags |= IO_WQ_WORK_FS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) req->work.flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) spin_unlock(¤t->fs->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) if (!(req->work.flags & IO_WQ_WORK_FILES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) (def->work_flags & IO_WQ_WORK_FILES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) !(req->flags & REQ_F_NO_FILE_TABLE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) if (id->files != current->files ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) id->nsproxy != current->nsproxy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) atomic_inc(&id->files->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) get_nsproxy(id->nsproxy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) if (!(req->flags & REQ_F_INFLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) req->flags |= REQ_F_INFLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) spin_lock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) list_add(&req->inflight_entry, &ctx->inflight_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) spin_unlock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) req->work.flags |= IO_WQ_WORK_FILES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) if (!(req->work.flags & IO_WQ_WORK_MM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) (def->work_flags & IO_WQ_WORK_MM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) if (id->mm != current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) mmgrab(id->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) req->work.flags |= IO_WQ_WORK_MM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) static void io_prep_async_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) const struct io_op_def *def = &io_op_defs[req->opcode];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) struct io_identity *id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) id = req->work.identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) if (req->flags & REQ_F_FORCE_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) req->work.flags |= IO_WQ_WORK_CONCURRENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) if (req->flags & REQ_F_ISREG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) io_wq_hash_work(&req->work, file_inode(req->file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) if (def->unbound_nonreg_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) req->work.flags |= IO_WQ_WORK_UNBOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) /* if we fail grabbing identity, we must COW, regrab, and retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) if (io_grab_identity(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) if (!io_identity_cow(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) /* can't fail at this point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) if (!io_grab_identity(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) static void io_prep_async_link(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) struct io_kiocb *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) io_prep_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) if (req->flags & REQ_F_LINK_HEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) list_for_each_entry(cur, &req->link_list, link_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) io_prep_async_work(cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) struct io_kiocb *link = io_prep_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) &req->work, req->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) io_wq_enqueue(ctx->io_wq, &req->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) return link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) static void io_queue_async_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) struct io_kiocb *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) /* init ->work of the whole link before punting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) io_prep_async_link(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) link = __io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) if (link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) io_queue_linked_timeout(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) static void io_kill_timeout(struct io_kiocb *req, int status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) struct io_timeout_data *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) ret = hrtimer_try_to_cancel(&io->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) if (ret != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) if (status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) atomic_set(&req->ctx->cq_timeouts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) atomic_read(&req->ctx->cq_timeouts) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) list_del_init(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) io_cqring_fill_event(req, status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) * Returns true if we found and killed one or more timeouts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) int canceled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) if (io_match_task(req, tsk, files)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) io_kill_timeout(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) canceled++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) return canceled != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) static void __io_queue_deferred(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) struct io_defer_entry, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) if (req_need_defer(de->req, de->seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) list_del_init(&de->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) io_req_task_queue(de->req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) kfree(de);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) } while (!list_empty(&ctx->defer_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) static void io_flush_timeouts(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) if (list_empty(&ctx->timeout_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) u32 events_needed, events_got;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) if (io_is_timeout_noseq(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) * Since seq can easily wrap around over time, subtract
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) * the last seq at which timeouts were flushed before comparing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) * Assuming not more than 2^31-1 events have happened since,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) * these subtractions won't have wrapped, so we can check if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) * target is in [last_seq, current_seq] by comparing the two.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) events_got = seq - ctx->cq_last_tm_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) if (events_got < events_needed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) io_kill_timeout(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) ctx->cq_last_tm_flush = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) static void io_commit_cqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) io_flush_timeouts(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) __io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) if (unlikely(!list_empty(&ctx->defer_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) __io_queue_deferred(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) static inline bool io_sqring_full(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) struct io_rings *r = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == r->sq_ring_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) unsigned tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) tail = ctx->cached_cq_tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * writes to the cq entry need to come after reading head; the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) * control dependency is enough as we're using WRITE_ONCE to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) * fill the cq entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) if (tail - READ_ONCE(rings->cq.head) == rings->cq_ring_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) ctx->cached_cq_tail++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) return &rings->cqes[tail & ctx->cq_mask];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) if (!ctx->cq_ev_fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) if (!ctx->eventfd_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) return io_wq_current_is_worker();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (wq_has_sleeper(&ctx->cq_wait)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) wake_up_interruptible(&ctx->cq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) if (waitqueue_active(&ctx->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) wake_up(&ctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) wake_up(&ctx->sq_data->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) if (io_should_trigger_evfd(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) eventfd_signal(ctx->cq_ev_fd, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) if (list_empty(&ctx->cq_overflow_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) clear_bit(0, &ctx->sq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) clear_bit(0, &ctx->cq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) /* Returns true if there are no backlogged entries after the flush */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) struct io_uring_cqe *cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) if (!force) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) rings->cq_ring_entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) cqe = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) if (!io_match_task(req, tsk, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) cqe = io_get_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) if (!cqe && !force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) list_move(&req->compl.list, &list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) if (cqe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) WRITE_ONCE(cqe->user_data, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) WRITE_ONCE(cqe->res, req->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) WRITE_ONCE(cqe->flags, req->compl.cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) ctx->cached_cq_overflow++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) WRITE_ONCE(ctx->rings->cq_overflow,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) ctx->cached_cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) io_cqring_mark_overflow(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) while (!list_empty(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) req = list_first_entry(&list, struct io_kiocb, compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) list_del(&req->compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) return cqe != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) if (test_bit(0, &ctx->cq_check_overflow)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) /* iopoll syncs against uring_lock, not completion_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) if (ctx->flags & IORING_SETUP_IOPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) __io_cqring_overflow_flush(ctx, force, tsk, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) if (ctx->flags & IORING_SETUP_IOPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) static void __io_cqring_fill_event(struct io_kiocb *req, long res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) unsigned int cflags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) struct io_uring_cqe *cqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) trace_io_uring_complete(ctx, req->user_data, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) * If we can't get a cq entry, userspace overflowed the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) * submission (by quite a lot). Increment the overflow count in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) * the ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) cqe = io_get_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) if (likely(cqe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) WRITE_ONCE(cqe->user_data, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) WRITE_ONCE(cqe->res, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) WRITE_ONCE(cqe->flags, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) } else if (ctx->cq_overflow_flushed ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) atomic_read(&req->task->io_uring->in_idle)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) * If we're in ring overflow flush mode, or in task cancel mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) * then we cannot store the request for later flushing, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) * to drop it on the floor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) ctx->cached_cq_overflow++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) if (list_empty(&ctx->cq_overflow_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) set_bit(0, &ctx->sq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) set_bit(0, &ctx->cq_check_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) req->result = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) req->compl.cflags = cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) list_add_tail(&req->compl.list, &ctx->cq_overflow_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) static void io_cqring_fill_event(struct io_kiocb *req, long res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) __io_cqring_fill_event(req, res, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) __io_cqring_fill_event(req, res, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) static void io_submit_flush_completions(struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) struct io_ring_ctx *ctx = cs->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) while (!list_empty(&cs->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) list_del(&req->compl.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) __io_cqring_fill_event(req, req->result, req->compl.cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) * io_free_req() doesn't care about completion_lock unless one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) * of these flags is set. REQ_F_WORK_INITIALIZED is in the list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) * because of a potential deadlock with req->work.fs->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) |REQ_F_WORK_INITIALIZED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) cs->nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) if (!cs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) io_cqring_add_event(req, res, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) req->result = res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) req->compl.cflags = cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) list_add_tail(&req->compl.list, &cs->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) if (++cs->nr >= 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) io_submit_flush_completions(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) static void io_req_complete(struct io_kiocb *req, long res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) __io_req_complete(req, res, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static inline bool io_is_fallback_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) return req == (struct io_kiocb *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) ((unsigned long) req->ctx->fallback_req & ~1UL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) req = ctx->fallback_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) if (!test_and_set_bit_lock(0, (unsigned long *) &ctx->fallback_req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) if (!state->free_reqs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) size_t sz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * Bulk alloc is all-or-nothing. If we fail to get a batch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) * retry single alloc to be on the safe side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) if (unlikely(ret <= 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) if (!state->reqs[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) goto fallback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) state->free_reqs = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) state->free_reqs--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) return state->reqs[state->free_reqs];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) fallback:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) return io_get_fallback_req(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) static inline void io_put_file(struct io_kiocb *req, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) bool fixed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) if (fixed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) percpu_ref_put(req->fixed_file_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) static void io_dismantle_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) io_clean_op(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) if (req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) kfree(req->async_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) if (req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) io_req_clean_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) static void __io_free_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) struct io_uring_task *tctx = req->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) io_dismantle_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) percpu_counter_dec(&tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) put_task_struct(req->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) if (likely(!io_is_fallback_req(req)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) kmem_cache_free(req_cachep, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) clear_bit_unlock(0, (unsigned long *) &ctx->fallback_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) static void io_kill_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) struct io_kiocb *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) bool cancelled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) link = list_first_entry_or_null(&req->link_list, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) * Can happen if a linked timeout fired and link had been like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) * req -> link t-out -> link t-out [-> ...]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) struct io_timeout_data *io = link->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) list_del_init(&link->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) ret = hrtimer_try_to_cancel(&io->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) if (ret != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) io_cqring_fill_event(link, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) cancelled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) req->flags &= ~REQ_F_LINK_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) if (cancelled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) io_put_req(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) * The list should never be empty when we are called here. But could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * potentially happen if the chain is messed up, check to be on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * safe side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) if (unlikely(list_empty(&req->link_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) nxt = list_first_entry(&req->link_list, struct io_kiocb, link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) list_del_init(&req->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) if (!list_empty(&nxt->link_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) nxt->flags |= REQ_F_LINK_HEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) return nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) * Called if REQ_F_LINK_HEAD is set, and we fail the head request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) static void io_fail_links(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) while (!list_empty(&req->link_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) struct io_kiocb *link = list_first_entry(&req->link_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) struct io_kiocb, link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) list_del_init(&link->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) trace_io_uring_fail_link(req, link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) io_cqring_fill_event(link, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) * It's ok to free under spinlock as they're not linked anymore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) * work.fs->lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) if (link->flags & REQ_F_WORK_INITIALIZED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) io_put_req_deferred(link, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) io_double_put_req(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) req->flags &= ~REQ_F_LINK_HEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) if (req->flags & REQ_F_LINK_TIMEOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) io_kill_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) * If LINK is set, we have dependent requests in this chain. If we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) * didn't fail this request, queue the first one up, moving any other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) * dependencies to the next request. In case of failure, fail the rest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) * of the chain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) if (likely(!(req->flags & REQ_F_FAIL_LINK)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) return io_req_link_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) io_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) static struct io_kiocb *io_req_find_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) if (likely(!(req->flags & REQ_F_LINK_HEAD)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) return __io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) struct task_struct *tsk = req->task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) enum task_work_notify_mode notify;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) if (tsk->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) return -ESRCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) * SQPOLL kernel thread doesn't need notification, just a wakeup. For
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) * all other cases, use TWA_SIGNAL unconditionally to ensure we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) * processing task_work. There's no reliable way to tell if TWA_RESUME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) * will do the job.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) notify = TWA_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) notify = TWA_SIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) ret = task_work_add(tsk, &req->task_work, notify);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) static void __io_req_task_cancel(struct io_kiocb *req, int error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) io_cqring_fill_event(req, error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) io_double_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) static void io_req_task_cancel(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) __io_req_task_cancel(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) static void __io_req_task_submit(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) if (!ctx->sqo_dead && !__io_sq_thread_acquire_mm(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) __io_queue_sqe(req, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) __io_req_task_cancel(req, -EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) if (ctx->flags & IORING_SETUP_SQPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) static void io_req_task_submit(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) __io_req_task_submit(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) static void io_req_task_queue(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) init_task_work(&req->task_work, io_req_task_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) percpu_ref_get(&req->ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) ret = io_req_task_work_add(req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) init_task_work(&req->task_work, io_req_task_cancel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) static void io_queue_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) struct io_kiocb *nxt = io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) if (nxt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) io_req_task_queue(nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) static void io_free_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) io_queue_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) __io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) struct req_batch {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) void *reqs[IO_IOPOLL_BATCH];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) int to_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) int task_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) static inline void io_init_req_batch(struct req_batch *rb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) rb->to_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) rb->task_refs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) rb->task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) static void __io_req_free_batch_flush(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) struct req_batch *rb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) percpu_ref_put_many(&ctx->refs, rb->to_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) rb->to_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) struct req_batch *rb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) if (rb->to_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) __io_req_free_batch_flush(ctx, rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) if (rb->task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) struct io_uring_task *tctx = rb->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) percpu_counter_sub(&tctx->inflight, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) put_task_struct_many(rb->task, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) rb->task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) if (unlikely(io_is_fallback_req(req))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) if (req->flags & REQ_F_LINK_HEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) io_queue_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) if (req->task != rb->task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) if (rb->task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) struct io_uring_task *tctx = rb->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) percpu_counter_sub(&tctx->inflight, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) put_task_struct_many(rb->task, rb->task_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) rb->task = req->task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) rb->task_refs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) rb->task_refs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) io_dismantle_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) rb->reqs[rb->to_free++] = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) __io_req_free_batch_flush(req->ctx, rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) * Drop reference to request, return next in chain (if there is one) if this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) * was the last reference to this request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) static struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) struct io_kiocb *nxt = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) if (refcount_dec_and_test(&req->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) nxt = io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) __io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) return nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) static void io_put_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) if (refcount_dec_and_test(&req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) static void io_put_req_deferred_cb(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) static void io_free_req_deferred(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) init_task_work(&req->task_work, io_put_req_deferred_cb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) ret = io_req_task_work_add(req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) if (refcount_sub_and_test(refs, &req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) io_free_req_deferred(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) static struct io_wq_work *io_steal_work(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) * A ref is owned by io-wq in which context we're. So, if that's the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) * last one, it's safe to steal next work. False negatives are Ok,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) * it just will be re-punted async in io_put_work()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) if (refcount_read(&req->refs) != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) nxt = io_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) return nxt ? &nxt->work : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) static void io_double_put_req(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) /* drop both submit and complete references */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) if (refcount_sub_and_test(2, &req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) io_free_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) static unsigned io_cqring_events(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) /* See comment at the top of this file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) /* make sure SQ entry isn't read before tail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) static unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) unsigned int cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) cflags |= IORING_CQE_F_BUFFER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) req->flags &= ~REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) kfree(kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) return cflags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) return io_put_kbuf(req, kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) static inline bool io_run_task_work(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) * Not safe to run on exiting task, and the task_work handling will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) * not add work to such a task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) if (unlikely(current->flags & PF_EXITING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) if (current->task_works) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) task_work_run();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) static void io_iopoll_queue(struct list_head *again)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) req = list_first_entry(again, struct io_kiocb, inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) list_del(&req->inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) __io_complete_rw(req, -EAGAIN, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) } while (!list_empty(again));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) * Find and free completed poll iocbs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) struct list_head *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) struct req_batch rb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) LIST_HEAD(again);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) /* order with ->result store in io_complete_rw_iopoll() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) io_init_req_batch(&rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) while (!list_empty(done)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) int cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) req = list_first_entry(done, struct io_kiocb, inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) if (READ_ONCE(req->result) == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) req->result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) req->iopoll_completed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) list_move_tail(&req->inflight_entry, &again);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) list_del(&req->inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) cflags = io_put_rw_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) __io_cqring_fill_event(req, req->result, cflags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) (*nr_events)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) if (refcount_dec_and_test(&req->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) io_req_free_batch(&rb, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) if (ctx->flags & IORING_SETUP_SQPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) io_req_free_batch_finish(ctx, &rb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) if (!list_empty(&again))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) io_iopoll_queue(&again);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) long min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) struct io_kiocb *req, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) LIST_HEAD(done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) bool spin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) * Only spin for completions if we don't have multiple devices hanging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) * off our complete list, and we're under the requested amount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) spin = !ctx->poll_multi_file && *nr_events < min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) * Move completed and retryable entries to our local lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) * If we find a request that requires polling, break out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) * and complete those lists first, if we have entries there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) if (READ_ONCE(req->iopoll_completed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) list_move_tail(&req->inflight_entry, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) if (!list_empty(&done))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) /* iopoll may have completed current req */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) if (READ_ONCE(req->iopoll_completed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) list_move_tail(&req->inflight_entry, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) if (ret && spin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) spin = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) if (!list_empty(&done))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) io_iopoll_complete(ctx, nr_events, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) * non-spinning poll check - we'll still enter the driver poll loop, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) * as a non-spinning completion check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) long min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) ret = io_do_iopoll(ctx, nr_events, min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) if (*nr_events >= min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) * We can't just wait for polled events to come to us, we have to actively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) * find and complete them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) if (!(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) while (!list_empty(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) unsigned int nr_events = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) io_do_iopoll(ctx, &nr_events, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) /* let it sleep and repeat later if can't complete a request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) if (nr_events == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) * Ensure we allow local-to-the-cpu processing to take place,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) * in this case we need to ensure that we reap all events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) * Also let task_work, etc. to progress by releasing the mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) unsigned int nr_events = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) int iters = 0, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) * We disallow the app entering submit/complete with polling, but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) * still need to lock the ring to prevent racing with polled issue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) * that got punted to a workqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) * Don't enter poll loop if we already have events pending.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) * If we do, we can potentially be spinning for commands that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) * already triggered a CQE (eg in error).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) if (test_bit(0, &ctx->cq_check_overflow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) __io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) if (io_cqring_events(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) * If a submit got punted to a workqueue, we can have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) * application entering polling for a command before it gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) * issued. That app will hold the uring_lock for the duration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) * of the poll right here, so we need to take a breather every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) * now and then to ensure that the issue has a chance to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) * the poll to the issued list. Otherwise we can spin here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) * forever, while the workqueue is stuck trying to acquire the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) * very same mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) if (!(++iters & 7)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) ret = io_iopoll_getevents(ctx, &nr_events, min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) } while (min && !nr_events && !need_resched());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) static void kiocb_end_write(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) * Tell lockdep we inherited freeze protection from submission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) * thread.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) if (req->flags & REQ_F_ISREG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) struct inode *inode = file_inode(req->file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) file_end_write(req->file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) static void io_complete_rw_common(struct kiocb *kiocb, long res,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) int cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) if (kiocb->ki_flags & IOCB_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) kiocb_end_write(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) if (res != req->result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) cflags = io_put_rw_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) __io_req_complete(req, res, cflags, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) static bool io_resubmit_prep(struct io_kiocb *req, int error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) ssize_t ret = -ECANCELED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) struct iov_iter iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) if (error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) ret = error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) goto end_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) rw = READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) case IORING_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) rw = WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) req->opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) goto end_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) ret = io_import_iovec(rw, req, &iovec, &iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) goto end_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) end_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) static bool io_rw_reissue(struct io_kiocb *req, long res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) umode_t mode = file_inode(req->file)->i_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) if (!S_ISBLK(mode) && !S_ISREG(mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) * If ref is dying, we might be running poll reap from the exit work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) * Don't attempt to reissue from that path, just let it fail with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) * -EAGAIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) if (percpu_ref_is_dying(&req->ctx->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) ret = io_sq_thread_acquire_mm(req->ctx, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) if (io_resubmit_prep(req, ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) if (!io_rw_reissue(req, res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) io_complete_rw_common(&req->rw.kiocb, res, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) __io_complete_rw(req, res, res2, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) if (kiocb->ki_flags & IOCB_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) kiocb_end_write(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) if (res != -EAGAIN && res != req->result)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) WRITE_ONCE(req->result, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) /* order with io_poll_complete() checking ->result */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) WRITE_ONCE(req->iopoll_completed, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) * After the iocb has been issued, it's safe to be found on the poll list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) * Adding the kiocb to the list AFTER submission ensures that we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) * find it from a io_iopoll_getevents() thread before the issuer is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) * accessing the kiocb cookie.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) static void io_iopoll_req_issued(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) * Track whether we have multiple files in our lists. This will impact
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) * how we do polling eventually, not spinning if we're on potentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) * different devices.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) if (list_empty(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) ctx->poll_multi_file = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) } else if (!ctx->poll_multi_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) struct io_kiocb *list_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) if (list_req->file != req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) ctx->poll_multi_file = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) * For fast devices, IO may have already completed. If it has, add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) * it to the front so we find it first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) if (READ_ONCE(req->iopoll_completed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) list_add(&req->inflight_entry, &ctx->iopoll_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) list_add_tail(&req->inflight_entry, &ctx->iopoll_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) if ((ctx->flags & IORING_SETUP_SQPOLL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) wq_has_sleeper(&ctx->sq_data->wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) wake_up(&ctx->sq_data->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) static void __io_state_file_put(struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) if (state->has_refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) fput_many(state->file, state->has_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) state->file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) static inline void io_state_file_put(struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) if (state->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) __io_state_file_put(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) * Get as many references to a file as we have IOs left in this submission,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) * assuming most submissions are for one file, or at least that each file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) * has more than one submission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) static struct file *__io_file_get(struct io_submit_state *state, int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) if (!state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) return fget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) if (state->file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) if (state->fd == fd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) state->has_refs--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) return state->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) __io_state_file_put(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) state->file = fget_many(fd, state->ios_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) if (!state->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) state->fd = fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) state->has_refs = state->ios_left - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) return state->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) static bool io_bdev_nowait(struct block_device *bdev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) #ifdef CONFIG_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) * If we tracked the file through the SCM inflight mechanism, we could support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) * any file. For now, just ensure that anything potentially problematic is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) * inline.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) static bool io_file_supports_async(struct file *file, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) umode_t mode = file_inode(file)->i_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) if (S_ISBLK(mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) if (io_bdev_nowait(file->f_inode->i_bdev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) if (S_ISSOCK(mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) if (S_ISREG(mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) file->f_op != &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) /* any ->read/write should understand O_NONBLOCK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) if (file->f_flags & O_NONBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) if (!(file->f_mode & FMODE_NOWAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) if (rw == READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) return file->f_op->read_iter != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) return file->f_op->write_iter != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) unsigned ioprio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) if (S_ISREG(file_inode(req->file)->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) req->flags |= REQ_F_ISREG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) kiocb->ki_pos = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) if (kiocb->ki_pos == -1 && !(req->file->f_mode & FMODE_STREAM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) req->flags |= REQ_F_CUR_POS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) kiocb->ki_pos = req->file->f_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) ioprio = READ_ONCE(sqe->ioprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) if (ioprio) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) ret = ioprio_check_cap(ioprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) kiocb->ki_ioprio = ioprio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) kiocb->ki_ioprio = get_current_ioprio();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) /* don't allow async punt if RWF_NOWAIT was requested */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) if (kiocb->ki_flags & IOCB_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) if (ctx->flags & IORING_SETUP_IOPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) if (!(kiocb->ki_flags & IOCB_DIRECT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) !kiocb->ki_filp->f_op->iopoll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) kiocb->ki_flags |= IOCB_HIPRI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) kiocb->ki_complete = io_complete_rw_iopoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) req->iopoll_completed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) if (kiocb->ki_flags & IOCB_HIPRI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) kiocb->ki_complete = io_complete_rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) req->rw.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) req->rw.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) req->buf_index = READ_ONCE(sqe->buf_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) case -EIOCBQUEUED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) case -ERESTARTSYS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) case -ERESTARTNOINTR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) case -ERESTARTNOHAND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) case -ERESTART_RESTARTBLOCK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) * We can't just restart the syscall, since previously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) * submitted sqes may already be in progress. Just fail this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) * IO with EINTR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) kiocb->ki_complete(kiocb, ret, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) struct io_async_rw *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) /* add previously done IO, if any */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) if (io && io->bytes_done > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) ret = io->bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) ret += io->bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) if (req->flags & REQ_F_CUR_POS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) req->file->f_pos = kiocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) __io_complete_rw(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) io_rw_done(kiocb, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) size_t len = req->rw.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) struct io_mapped_ubuf *imu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) u16 index, buf_index = req->buf_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) size_t offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) u64 buf_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) if (unlikely(buf_index >= ctx->nr_user_bufs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) index = array_index_nospec(buf_index, ctx->nr_user_bufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) imu = &ctx->user_bufs[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) buf_addr = req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) /* overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) if (buf_addr + len < buf_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) /* not inside the mapped region */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) if (buf_addr < imu->ubuf || buf_addr + len > imu->ubuf + imu->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) * May not be a start of buffer, set size appropriately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) * and advance us to the beginning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) offset = buf_addr - imu->ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) if (offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) * Don't use iov_iter_advance() here, as it's really slow for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) * using the latter parts of a big fixed buffer - it iterates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) * over each segment manually. We can cheat a bit here, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) * we know that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) * 1) it's a BVEC iter, we set it up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) * 2) all bvecs are PAGE_SIZE in size, except potentially the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) * first and last bvec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) * So just find our index, and adjust the iterator afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) * If the offset is within the first bvec (or the whole first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) * bvec, just use iov_iter_advance(). This makes it easier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) * since we can just skip the first segment, which may not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) * be PAGE_SIZE aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) const struct bio_vec *bvec = imu->bvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) if (offset <= bvec->bv_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) iov_iter_advance(iter, offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) unsigned long seg_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) /* skip first vec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) offset -= bvec->bv_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) seg_skip = 1 + (offset >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) iter->bvec = bvec + seg_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) iter->nr_segs -= seg_skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) iter->count -= bvec->bv_len + offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) iter->iov_offset = offset & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) if (needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) * "Normal" inline submissions always hold the uring_lock, since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) * grab it from the system call. Same is true for the SQPOLL offload.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) * The only exception is when we've detached the request and issue it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) * from an async worker thread, grab the lock for that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) if (needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) int bgid, struct io_buffer *kbuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) struct io_buffer *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) io_ring_submit_lock(req->ctx, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) lockdep_assert_held(&req->ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) head = xa_load(&req->ctx->io_buffers, bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) if (head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) if (!list_empty(&head->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) kbuf = list_last_entry(&head->list, struct io_buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) list_del(&kbuf->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) kbuf = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) xa_erase(&req->ctx->io_buffers, bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) if (*len > kbuf->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) *len = kbuf->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) kbuf = ERR_PTR(-ENOBUFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) io_ring_submit_unlock(req->ctx, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) u16 bgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) bgid = req->buf_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) req->rw.addr = (u64) (unsigned long) kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) req->flags |= REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) return u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) struct compat_iovec __user *uiov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) compat_ssize_t clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) void __user *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) ssize_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) uiov = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) if (!access_ok(uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) if (__get_user(clen, &uiov->iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) if (clen < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) len = clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) buf = io_rw_buffer_select(req, &len, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) if (IS_ERR(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) return PTR_ERR(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) iov[0].iov_base = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) iov[0].iov_len = (compat_size_t) len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) void __user *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) ssize_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) if (copy_from_user(iov, uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) len = iov[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) if (len < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) buf = io_rw_buffer_select(req, &len, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) if (IS_ERR(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) return PTR_ERR(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) iov[0].iov_base = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) iov[0].iov_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) if (req->flags & REQ_F_BUFFER_SELECTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) iov[0].iov_len = kbuf->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) if (req->rw.len != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) return io_compat_import(req, iov, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) return __io_iov_buffer_select(req, iov, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) struct iovec **iovec, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) void __user *buf = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) size_t sqe_len = req->rw.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) u8 opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) opcode = req->opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) *iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) return io_import_fixed(req, rw, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) /* buffer index only valid with fixed read/write, or buffer select */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) if (IS_ERR(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) return PTR_ERR(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) req->rw.len = sqe_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) *iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) ret = io_iov_buffer_select(req, *iovec, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) ret = (*iovec)->iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) iov_iter_init(iter, rw, *iovec, 1, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) *iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) req->ctx->compat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) struct iovec **iovec, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) struct io_async_rw *iorw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) if (!iorw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) return __io_import_iovec(rw, req, iovec, iter, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) *iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) * For files that don't have ->read_iter() and ->write_iter(), handle them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) * by looping over ->read() or ->write() manually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) struct file *file = req->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) ssize_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) * Don't support polled IO through this interface, and we can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) * support non-blocking either. For the latter, this just causes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) * the kiocb to be handled from an async context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) if (kiocb->ki_flags & IOCB_HIPRI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) if (kiocb->ki_flags & IOCB_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) while (iov_iter_count(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) struct iovec iovec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) ssize_t nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) if (!iov_iter_is_bvec(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) iovec = iov_iter_iovec(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) iovec.iov_base = u64_to_user_ptr(req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) iovec.iov_len = req->rw.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) if (rw == READ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) nr = file->f_op->read(file, iovec.iov_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) iovec.iov_len, io_kiocb_ppos(kiocb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) nr = file->f_op->write(file, iovec.iov_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) iovec.iov_len, io_kiocb_ppos(kiocb));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) if (nr < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) ret = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) ret += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) if (!iov_iter_is_bvec(iter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) iov_iter_advance(iter, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) req->rw.addr += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) req->rw.len -= nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) if (!req->rw.len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) if (nr != iovec.iov_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) const struct iovec *fast_iov, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) memcpy(&rw->iter, iter, sizeof(*iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) rw->free_iovec = iovec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) rw->bytes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) /* can only be fixed buffers, no need to do anything */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) if (iov_iter_is_bvec(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) if (!iovec) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) unsigned iov_off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) rw->iter.iov = rw->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) if (iter->iov != fast_iov) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) iov_off = iter->iov - fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) rw->iter.iov += iov_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) if (rw->fast_iov != fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) memcpy(rw->fast_iov + iov_off, fast_iov + iov_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) sizeof(struct iovec) * iter->nr_segs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) static inline int __io_alloc_async_data(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) WARN_ON_ONCE(!io_op_defs[req->opcode].async_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) return req->async_data == NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) static int io_alloc_async_data(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) if (!io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) return __io_alloc_async_data(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) const struct iovec *fast_iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) struct iov_iter *iter, bool force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) if (!force && !io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) if (__io_alloc_async_data(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) io_req_map_rw(req, iovec, fast_iov, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) struct io_async_rw *iorw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) struct iovec *iov = iorw->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) ret = __io_import_iovec(rw, req, &iov, &iorw->iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) if (unlikely(ret < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) iorw->bytes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) iorw->free_iovec = iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) if (iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) ret = io_prep_rw(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) if (unlikely(!(req->file->f_mode & FMODE_READ)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) /* either don't need iovec imported or already have it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) if (!req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) return io_rw_prep_async(req, READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) * This is our waitqueue callback handler, registered through lock_page_async()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) * when we initially tried to do the IO with the iocb armed our waitqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) * This gets called when the page is unlocked, and we generally expect that to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) * happen when the page IO is completed and the page is now uptodate. This will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) * queue a task_work based retry of the operation, attempting to copy the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) * again. If the latter fails because the page was NOT uptodate, then we will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) * do a thread based blocking retry of the operation. That's the unexpected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) * slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) int sync, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) struct wait_page_queue *wpq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) struct wait_page_key *key = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) wpq = container_of(wait, struct wait_page_queue, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) if (!wake_page_match(wpq, key))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) list_del_init(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) init_task_work(&req->task_work, io_req_task_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) percpu_ref_get(&req->ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) /* submit ref gets dropped, acquire a new one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) ret = io_req_task_work_add(req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) /* queue just for cancelation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) init_task_work(&req->task_work, io_req_task_cancel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) * This controls whether a given IO request should be armed for async page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) * based retry. If we return false here, the request is handed to the async
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) * worker threads for retry. If we're doing buffered reads on a regular file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) * we prepare a private wait_page_queue entry and retry the operation. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) * will either succeed because the page is now uptodate and unlocked, or it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) * will register a callback when the page is unlocked at IO completion. Through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) * that callback, io_uring uses task_work to setup a retry of the operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) * That retry will attempt the buffered read again. The retry will generally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) * succeed, or in rare cases where it fails, we then fall back to using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) * async worker threads for a blocking retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) static bool io_rw_should_retry(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) struct wait_page_queue *wait = &rw->wpq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) /* never retry for NOWAIT, we just complete with -EAGAIN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) if (req->flags & REQ_F_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) /* Only for buffered IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) * just use poll if we can, and don't attempt if the fs doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) * support callback based unlocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) wait->wait.func = io_async_buf_func;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) wait->wait.private = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) wait->wait.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) INIT_LIST_HEAD(&wait->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) kiocb->ki_flags |= IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) kiocb->ki_flags &= ~IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) kiocb->ki_waitq = wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) if (req->file->f_op->read_iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) return call_read_iter(req->file, &req->rw.kiocb, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) else if (req->file->f_op->read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) return loop_rw_iter(READ, req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) static int io_read(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) struct iov_iter __iter, *iter = &__iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) ssize_t io_size, ret, ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) bool no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) if (rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) iter = &rw->iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) io_size = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) req->result = io_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) /* Ensure we clear previously set non-block flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) if (!force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) kiocb->ki_flags &= ~IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) kiocb->ki_flags |= IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) /* If the file doesn't support async, just async punt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) no_async = force_nonblock && !io_file_supports_async(req->file, READ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) if (no_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) ret = io_iter_do_read(req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) } else if (ret == -EIOCBQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) } else if (ret == -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) /* IOPOLL retry should happen for io-wq threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) /* no retry on NONBLOCK marked file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) if (req->file->f_flags & O_NONBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) /* some cases will consume bytes even on error returns */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) iov_iter_revert(iter, io_size - iov_iter_count(iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) } else if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) /* make sure -ERESTARTSYS -> -EINTR is done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) /* read it all, or we did blocking attempt. no retry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) if (!iov_iter_count(iter) || !force_nonblock ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) io_size -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) copy_iov:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) if (ret2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) ret = ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) if (no_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) /* it's copied and will be cleaned with ->io */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) iovec = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) /* now use our persistent iterator, if we aren't already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) iter = &rw->iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) rw->bytes_done += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) /* if we can retry, do so with the callbacks armed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) if (!io_rw_should_retry(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) kiocb->ki_flags &= ~IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) * Now retry read with the IOCB_WAITQ parts set in the iocb. If we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) * get -EIOCBQUEUED, then we'll get a notification when the desired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) * page gets unlocked. We can also get a partial read here, and if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) * do, then just retry at the new offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) ret = io_iter_do_read(req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) if (ret == -EIOCBQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) } else if (ret > 0 && ret < io_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) /* we got some bytes, but not all. retry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) kiocb->ki_flags &= ~IOCB_WAITQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) kiocb_done(kiocb, ret, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) /* it's reportedly faster than delegating the null check to kfree() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) if (iovec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) ret = io_prep_rw(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) /* either don't need iovec imported or already have it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) if (!req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) return io_rw_prep_async(req, WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) static int io_write(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) struct kiocb *kiocb = &req->rw.kiocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) struct iov_iter __iter, *iter = &__iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) struct io_async_rw *rw = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) ssize_t ret, ret2, io_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) if (rw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) iter = &rw->iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) io_size = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) req->result = io_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) /* Ensure we clear previously set non-block flag */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) if (!force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) kiocb->ki_flags &= ~IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) kiocb->ki_flags |= IOCB_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) /* If the file doesn't support async, just async punt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) if (force_nonblock && !io_file_supports_async(req->file, WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) /* file path doesn't support NOWAIT for non-direct_IO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) (req->flags & REQ_F_ISREG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) * Open-code file_start_write here to grab freeze protection,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) * which will be released by another thread in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) * io_complete_rw(). Fool lockdep by telling it the lock got
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) * released so that it doesn't complain about the held lock when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) * we return to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) if (req->flags & REQ_F_ISREG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) sb_start_write(file_inode(req->file)->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) __sb_writers_release(file_inode(req->file)->i_sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) SB_FREEZE_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) kiocb->ki_flags |= IOCB_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) if (req->file->f_op->write_iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) ret2 = call_write_iter(req->file, kiocb, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) else if (req->file->f_op->write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) ret2 = loop_rw_iter(WRITE, req, iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) ret2 = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) * retry them without IOCB_NOWAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) ret2 = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) /* no retry on NONBLOCK marked file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) if (ret2 == -EAGAIN && (req->file->f_flags & O_NONBLOCK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) if (!force_nonblock || ret2 != -EAGAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) /* IOPOLL retry should happen for io-wq threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) goto copy_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) kiocb_done(kiocb, ret2, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) copy_iov:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) /* some cases will consume bytes even on error returns */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) iov_iter_revert(iter, io_size - iov_iter_count(iter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) /* it's reportedly faster than delegating the null check to kfree() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) if (iovec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) kfree(iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) static int __io_splice_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) struct io_splice* sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) sp->file_in = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) sp->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) sp->flags = READ_ONCE(sqe->splice_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) if (unlikely(sp->flags & ~valid_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) sp->file_in = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) (sp->flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) if (!sp->file_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) * Splice operation will be punted aync, and here need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) * modify io_wq_work.flags, so initialize io_wq_work firstly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) req->work.flags |= IO_WQ_WORK_UNBOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) static int io_tee_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) return __io_splice_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) static int io_tee(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) struct io_splice *sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) struct file *in = sp->file_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) struct file *out = sp->file_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) if (sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) ret = do_tee(in, out, sp->len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) if (ret != sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) struct io_splice* sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) sp->off_in = READ_ONCE(sqe->splice_off_in);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) sp->off_out = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) return __io_splice_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) static int io_splice(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) struct io_splice *sp = &req->splice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) struct file *in = sp->file_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) struct file *out = sp->file_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) loff_t *poff_in, *poff_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) if (sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) if (ret != sp->len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) * IORING_OP_NOP just posts a completion event, nothing else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) static int io_nop(struct io_kiocb *req, struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) __io_req_complete(req, 0, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) if (!req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) sqe->splice_fd_in))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) req->sync.flags = READ_ONCE(sqe->fsync_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) req->sync.off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) req->sync.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) static int io_fsync(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) loff_t end = req->sync.off + req->sync.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) /* fsync always requires a blocking context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) ret = vfs_fsync_range(req->file, req->sync.off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) end > 0 ? end : LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) req->sync.flags & IORING_FSYNC_DATASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) static int io_fallocate_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) if (sqe->ioprio || sqe->buf_index || sqe->rw_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) req->sync.off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) req->sync.len = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) req->sync.mode = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) /* fallocate always requiring blocking context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) req->sync.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) const char __user *fname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) if (unlikely(sqe->ioprio || sqe->buf_index || sqe->splice_fd_in))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) if (unlikely(req->flags & REQ_F_FIXED_FILE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) /* open.how should be already initialised */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) if (!(req->open.how.flags & O_PATH) && force_o_largefile())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) req->open.how.flags |= O_LARGEFILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) req->open.dfd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) req->open.filename = getname(fname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) if (IS_ERR(req->open.filename)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) ret = PTR_ERR(req->open.filename);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) req->open.filename = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) req->open.nofile = rlimit(RLIMIT_NOFILE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) req->open.ignore_nonblock = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) u64 flags, mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) mode = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) flags = READ_ONCE(sqe->open_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) req->open.how = build_open_how(flags, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) return __io_openat_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) struct open_how __user *how;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) if (len < OPEN_HOW_SIZE_VER0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) return __io_openat_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) static int io_openat2(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) struct open_flags op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) if (force_nonblock && !req->open.ignore_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) ret = build_open_flags(&req->open.how, &op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) file = do_filp_open(req->open.dfd, req->open.filename, &op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) if (IS_ERR(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) put_unused_fd(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) ret = PTR_ERR(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) * A work-around to ensure that /proc/self works that way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) * that it should - if we get -EOPNOTSUPP back, then assume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) * that proc_self_get_link() failed us because we're in async
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) * context. We should be safe to retry this from the task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) * itself with force_nonblock == false set, as it should not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) * block on lookup. Would be nice to know this upfront and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) * avoid the async dance, but doesn't seem feasible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) req->open.ignore_nonblock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) io_req_task_queue(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) fsnotify_open(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) fd_install(ret, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) putname(req->open.filename);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) static int io_openat(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) return io_openat2(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) static int io_remove_buffers_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) tmp = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) if (!tmp || tmp > USHRT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) memset(p, 0, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) p->nbufs = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) p->bgid = READ_ONCE(sqe->buf_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) int bgid, unsigned nbufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) unsigned i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) /* shouldn't happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) if (!nbufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) /* the head kbuf is the list itself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) while (!list_empty(&buf->list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) struct io_buffer *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) nxt = list_first_entry(&buf->list, struct io_buffer, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) list_del(&nxt->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) kfree(nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) if (++i == nbufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) xa_erase(&ctx->io_buffers, bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) struct io_buffer *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) io_ring_submit_lock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) lockdep_assert_held(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) head = xa_load(&ctx->io_buffers, p->bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) if (head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) /* need to hold the lock to complete IOPOLL requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) if (ctx->flags & IORING_SETUP_IOPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) static int io_provide_buffers_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) unsigned long size, tmp_check;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) u64 tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) tmp = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) if (!tmp || tmp > USHRT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) p->nbufs = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) p->addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) p->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) &size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) size = (unsigned long)p->len * p->nbufs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) if (!access_ok(u64_to_user_ptr(p->addr), size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) p->bgid = READ_ONCE(sqe->buf_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) tmp = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) if (tmp > USHRT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) p->bid = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) struct io_buffer *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) u64 addr = pbuf->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) int i, bid = pbuf->bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) for (i = 0; i < pbuf->nbufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) if (!buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) buf->addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) buf->bid = bid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) addr += pbuf->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) bid++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) if (!*head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) INIT_LIST_HEAD(&buf->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) *head = buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) list_add_tail(&buf->list, &(*head)->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) return i ? i : -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) struct io_provide_buf *p = &req->pbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) struct io_buffer *head, *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) io_ring_submit_lock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) lockdep_assert_held(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) list = head = xa_load(&ctx->io_buffers, p->bgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) ret = io_add_buffers(p, &head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) if (ret >= 0 && !list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) __io_remove_buffers(ctx, head, p->bgid, -1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) /* need to hold the lock to complete IOPOLL requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) if (ctx->flags & IORING_SETUP_IOPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) io_ring_submit_unlock(ctx, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) static int io_epoll_ctl_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) #if defined(CONFIG_EPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) req->epoll.epfd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) req->epoll.op = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) req->epoll.fd = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) if (ep_op_has_event(req->epoll.op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) struct epoll_event __user *ev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) #if defined(CONFIG_EPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) struct io_epoll *ie = &req->epoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) req->madvise.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) req->madvise.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) req->madvise.advice = READ_ONCE(sqe->fadvise_advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) static int io_madvise(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) struct io_madvise *ma = &req->madvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) req->fadvise.offset = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) req->fadvise.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) req->fadvise.advice = READ_ONCE(sqe->fadvise_advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) struct io_fadvise *fa = &req->fadvise;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) if (force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) switch (fa->advice) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) case POSIX_FADV_NORMAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) case POSIX_FADV_RANDOM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) case POSIX_FADV_SEQUENTIAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) if (req->flags & REQ_F_FIXED_FILE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) req->statx.dfd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) req->statx.mask = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) req->statx.flags = READ_ONCE(sqe->statx_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) static int io_statx(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) struct io_statx *ctx = &req->statx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) if (force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) /* only need file table for an actual valid fd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) if (ctx->dfd == -1 || ctx->dfd == AT_FDCWD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) req->flags |= REQ_F_NO_FILE_TABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) ctx->buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) * If we queue this for async, it must not be cancellable. That would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) * leave the 'file' in an undeterminate state, and here need to modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) * io_wq_work.flags, so initialize io_wq_work firstly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) if (req->flags & REQ_F_FIXED_FILE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) req->close.fd = READ_ONCE(sqe->fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) if ((req->file && req->file->f_op == &io_uring_fops))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) req->close.put_file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) static int io_close(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) struct io_close *close = &req->close;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) /* might be already done during nonblock submission */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) if (!close->put_file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) ret = __close_fd_get_file(close->fd, &close->put_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) return (ret == -ENOENT) ? -EBADF : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) /* if the file has a flush method, be safe and punt to async */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) if (close->put_file->f_op->flush && force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) /* not safe to cancel at this point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) req->work.flags |= IO_WQ_WORK_NO_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) /* was never set, but play safe */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) req->flags &= ~REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) /* avoid grabbing files - we don't need the files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) req->flags |= REQ_F_NO_FILE_TABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) /* No ->flush() or already async, safely close from here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) ret = filp_close(close->put_file, req->work.identity->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) fput(close->put_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) close->put_file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) if (!req->file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) sqe->splice_fd_in))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) req->sync.off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) req->sync.len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) req->sync.flags = READ_ONCE(sqe->sync_range_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) /* sync_file_range always requires a blocking context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) ret = sync_file_range(req->file, req->sync.off, req->sync.len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) req->sync.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) #if defined(CONFIG_NET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) static int io_setup_async_msg(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) struct io_async_msghdr *kmsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) struct io_async_msghdr *async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) if (async_msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) if (io_alloc_async_data(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) if (kmsg->iov != kmsg->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) kfree(kmsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) memcpy(async_msg, kmsg, sizeof(*kmsg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) static int io_sendmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) iomsg->iov = iomsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) iomsg->msg.msg_name = &iomsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) req->sr_msg.msg_flags, &iomsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) struct io_async_msghdr *async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) sr->msg_flags = READ_ONCE(sqe->msg_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) sr->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) sr->msg_flags |= MSG_CMSG_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) ret = io_sendmsg_copy_hdr(req, async_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) struct io_async_msghdr iomsg, *kmsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) if (req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) kmsg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) kmsg->msg.msg_name = &kmsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) /* if iov is set, it's allocated already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) if (!kmsg->iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) kmsg->iov = kmsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) kmsg->msg.msg_iter.iov = kmsg->iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) ret = io_sendmsg_copy_hdr(req, &iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) kmsg = &iomsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) min_ret = iov_iter_count(&kmsg->msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) return io_setup_async_msg(req, kmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) if (kmsg->iov != kmsg->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) kfree(kmsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) if (ret < min_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) static int io_send(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) struct msghdr msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) struct iovec iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) msg.msg_name = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) msg.msg_control = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) msg.msg_controllen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) msg.msg_namelen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) min_ret = iov_iter_count(&msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) msg.msg_flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) ret = sock_sendmsg(sock, &msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) if (ret < min_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) struct iovec __user *uiov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) size_t iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) &iomsg->uaddr, &uiov, &iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) if (iov_len > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) if (copy_from_user(iomsg->iov, uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) sr->len = iomsg->iov[0].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) iov_iter_init(&iomsg->msg.msg_iter, READ, iomsg->iov, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) sr->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) iomsg->iov = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) &iomsg->iov, &iomsg->msg.msg_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) struct compat_msghdr __user *msg_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) struct compat_iovec __user *uiov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) compat_uptr_t ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) compat_size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) msg_compat = (struct compat_msghdr __user *) sr->umsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) ret = __get_compat_msghdr(&iomsg->msg, msg_compat, &iomsg->uaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) &ptr, &len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) uiov = compat_ptr(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) compat_ssize_t clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) if (len > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) if (!access_ok(uiov, sizeof(*uiov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) if (__get_user(clen, &uiov->iov_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) if (clen < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) sr->len = clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) iomsg->iov[0].iov_len = clen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) iomsg->iov = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) UIO_FASTIOV, &iomsg->iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) &iomsg->msg.msg_iter, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) static int io_recvmsg_copy_hdr(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) struct io_async_msghdr *iomsg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) iomsg->msg.msg_name = &iomsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) iomsg->iov = iomsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) return __io_compat_recvmsg_copy_hdr(req, iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) return __io_recvmsg_copy_hdr(req, iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) bool needs_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) sr->kbuf = kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) req->flags |= REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) return kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) return io_put_kbuf(req, req->sr_msg.kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) static int io_recvmsg_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) struct io_async_msghdr *async_msg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) sr->msg_flags = READ_ONCE(sqe->msg_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) sr->len = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) sr->bgid = READ_ONCE(sqe->buf_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) if (req->ctx->compat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) sr->msg_flags |= MSG_CMSG_COMPAT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) ret = io_recvmsg_copy_hdr(req, async_msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) req->flags |= REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) struct io_async_msghdr iomsg, *kmsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) int ret, cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) if (req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) kmsg = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) kmsg->msg.msg_name = &kmsg->addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) /* if iov is set, it's allocated already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) if (!kmsg->iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) kmsg->iov = kmsg->fast_iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) kmsg->msg.msg_iter.iov = kmsg->iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) ret = io_recvmsg_copy_hdr(req, &iomsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) kmsg = &iomsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) kbuf = io_recv_buffer_select(req, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) return PTR_ERR(kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->iov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) 1, req->sr_msg.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) min_ret = iov_iter_count(&kmsg->msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) kmsg->uaddr, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) return io_setup_async_msg(req, kmsg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) cflags = io_put_recv_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) if (kmsg->iov != kmsg->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) kfree(kmsg->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) __io_req_complete(req, ret, cflags, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) static int io_recv(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) struct io_buffer *kbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) struct io_sr_msg *sr = &req->sr_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) struct msghdr msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) void __user *buf = sr->buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) struct socket *sock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) struct iovec iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) int min_ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) int ret, cflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) sock = sock_from_file(req->file, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) if (unlikely(!sock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) if (req->flags & REQ_F_BUFFER_SELECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) kbuf = io_recv_buffer_select(req, !force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) if (IS_ERR(kbuf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) return PTR_ERR(kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) buf = u64_to_user_ptr(kbuf->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) msg.msg_name = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) msg.msg_control = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) msg.msg_controllen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) msg.msg_namelen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) msg.msg_iocb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) msg.msg_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) if (flags & MSG_DONTWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) else if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) flags |= MSG_DONTWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) if (flags & MSG_WAITALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) min_ret = iov_iter_count(&msg.msg_iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) ret = sock_recvmsg(sock, &msg, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) if (force_nonblock && ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) if (req->flags & REQ_F_BUFFER_SELECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) cflags = io_put_recv_kbuf(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) __io_req_complete(req, ret, cflags, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) struct io_accept *accept = &req->accept;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) accept->flags = READ_ONCE(sqe->accept_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) accept->nofile = rlimit(RLIMIT_NOFILE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) static int io_accept(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) struct io_accept *accept = &req->accept;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) if (req->file->f_flags & O_NONBLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) req->flags |= REQ_F_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) ret = __sys_accept4_file(req->file, file_flags, accept->addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) accept->addr_len, accept->flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) accept->nofile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) if (ret == -EAGAIN && force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) struct io_connect *conn = &req->connect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) struct io_async_connect *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) conn->addr_len = READ_ONCE(sqe->addr2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) if (!io)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) return move_addr_to_kernel(conn->addr, conn->addr_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) &io->address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) static int io_connect(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) struct io_async_connect __io, *io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) unsigned file_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) if (req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) ret = move_addr_to_kernel(req->connect.addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) req->connect.addr_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) &__io.address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) io = &__io;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) file_flags = force_nonblock ? O_NONBLOCK : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) ret = __sys_connect_file(req->file, &io->address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) req->connect.addr_len, file_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) if (req->async_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) if (io_alloc_async_data(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) memcpy(req->async_data, &__io, sizeof(__io));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) if (ret == -ERESTARTSYS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) #else /* !CONFIG_NET */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) static int io_send(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) static int io_recvmsg_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) static int io_recv(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) static int io_accept(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) static int io_connect(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) #endif /* CONFIG_NET */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) struct io_poll_table {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) struct poll_table_struct pt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) int nr_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) __poll_t mask, task_work_func_t func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) bool twa_signal_ok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) /* for instances that support it check for an event match first: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) if (mask && !(mask & poll->events))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) req->result = mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) init_task_work(&req->task_work, func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) percpu_ref_get(&req->ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) * If we using the signalfd wait_queue_head for this wakeup, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) * it's not safe to use TWA_SIGNAL as we could be recursing on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) * tsk->sighand->siglock on doing the wakeup. Should not be needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) * either, as the normal wakeup will suffice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) twa_signal_ok = (poll->head != &req->task->sighand->signalfd_wqh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) * If this fails, then the task is exiting. When a task exits, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) * work gets canceled, so just cancel this request as well instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) * of executing it. We can't safely execute it anyway, as we may not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) * have the needed state needed for it anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) ret = io_req_task_work_add(req, twa_signal_ok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) WRITE_ONCE(poll->canceled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) tsk = io_wq_get_task(req->ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) task_work_add(tsk, &req->task_work, TWA_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) wake_up_process(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) __acquires(&req->ctx->completion_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) if (!req->result && !READ_ONCE(poll->canceled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) struct poll_table_struct pt = { ._key = poll->events };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) req->result = vfs_poll(req->file, &pt) & poll->events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) if (!req->result && !READ_ONCE(poll->canceled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) add_wait_queue(poll->head, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) if (req->opcode == IORING_OP_POLL_ADD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) return req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) return req->apoll->double_poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) if (req->opcode == IORING_OP_POLL_ADD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) return &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) return &req->apoll->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) static void io_poll_remove_double(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) struct io_poll_iocb *poll = io_poll_get_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) lockdep_assert_held(&req->ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) if (poll && poll->head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) struct wait_queue_head *head = poll->head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) spin_lock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) if (poll->wait.private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) refcount_dec(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) poll->head = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) spin_unlock(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) req->poll.done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) io_cqring_fill_event(req, error ? error : mangle_poll(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) static void io_poll_task_func(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) if (io_poll_rewait(req, &req->poll)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) hash_del(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) io_poll_complete(req, req->result, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) nxt = io_put_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) if (nxt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) __io_req_task_submit(nxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) int sync, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) struct io_poll_iocb *poll = io_poll_get_single(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) __poll_t mask = key_to_poll(key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) /* for instances that support it check for an event match first: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) if (mask && !(mask & poll->events))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) list_del_init(&wait->entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) if (poll && poll->head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) bool done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) spin_lock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) done = list_empty(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) if (!done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) /* make sure double remove sees this as being gone */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) wait->private = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) spin_unlock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) if (!done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) /* use wait func handler, so it matches the rq type */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) poll->wait.func(&poll->wait, mode, sync, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) refcount_dec(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) wait_queue_func_t wake_func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) poll->head = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) poll->done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) poll->canceled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) poll->events = events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) INIT_LIST_HEAD(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) init_waitqueue_func_entry(&poll->wait, wake_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) struct wait_queue_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) struct io_poll_iocb **poll_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) struct io_kiocb *req = pt->req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) * The file being polled uses multiple waitqueues for poll handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) * (e.g. one for read, one for write). Setup a separate io_poll_iocb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) * if this happens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) if (unlikely(pt->nr_entries)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) struct io_poll_iocb *poll_one = poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) /* already have a 2nd entry, fail a third attempt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) if (*poll_ptr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) pt->error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) /* double add on the same waitqueue head, ignore */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) if (poll->head == head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) if (!poll) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) pt->error = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) refcount_inc(&req->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) poll->wait.private = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) *poll_ptr = poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) pt->nr_entries++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) poll->head = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) if (poll->events & EPOLLEXCLUSIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) add_wait_queue_exclusive(head, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) add_wait_queue(head, &poll->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) struct poll_table_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) struct async_poll *apoll = pt->req->apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) static void io_async_task_func(struct callback_head *cb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) struct async_poll *apoll = req->apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) trace_io_uring_task_run(req->ctx, req->opcode, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) if (io_poll_rewait(req, &apoll->poll)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) /* If req is still hashed, it cannot have been canceled. Don't check. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) if (hash_hashed(&req->hash_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) hash_del(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) if (!READ_ONCE(apoll->poll.canceled))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) __io_req_task_submit(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) __io_req_task_cancel(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) kfree(apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) kfree(apoll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) struct io_poll_iocb *poll = &req->apoll->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) key_to_poll(key));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) static void io_poll_req_insert(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) struct hlist_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) hlist_add_head(&req->hash_node, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) struct io_poll_iocb *poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) struct io_poll_table *ipt, __poll_t mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) wait_queue_func_t wake_func)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) __acquires(&ctx->completion_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) bool cancel = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) INIT_HLIST_NODE(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) io_init_poll_iocb(poll, mask, wake_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) poll->file = req->file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) poll->wait.private = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) ipt->pt._key = mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235) ipt->req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) ipt->error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) ipt->nr_entries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) mask = vfs_poll(req->file, &ipt->pt) & poll->events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) if (unlikely(!ipt->nr_entries) && !ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241) ipt->error = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) if (ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) if (likely(poll->head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247) spin_lock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) if (unlikely(list_empty(&poll->wait.entry))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) if (ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) cancel = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) ipt->error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252) mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) if (mask || ipt->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256) else if (cancel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) WRITE_ONCE(poll->canceled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) else if (!poll->done) /* actually waiting for an event */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) io_poll_req_insert(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260) spin_unlock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) static bool io_arm_poll_handler(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) const struct io_op_def *def = &io_op_defs[req->opcode];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) struct async_poll *apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271) struct io_poll_table ipt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) __poll_t mask, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) int rw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) if (!req->file || !file_can_poll(req->file))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) if (req->flags & REQ_F_POLLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) if (def->pollin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280) rw = READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) else if (def->pollout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) rw = WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) /* if we can't nonblock try, then no point in arming a poll handler */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) if (!io_file_supports_async(req->file, rw))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) if (unlikely(!apoll))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) apoll->double_poll = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294) req->flags |= REQ_F_POLLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) req->apoll = apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) if (def->pollin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) mask |= POLLIN | POLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300) if (def->pollout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) mask |= POLLOUT | POLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304) if ((req->opcode == IORING_OP_RECVMSG) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) (req->sr_msg.msg_flags & MSG_ERRQUEUE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) mask &= ~POLLIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) mask |= POLLERR | POLLPRI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310) ipt.pt._qproc = io_async_queue_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) io_async_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) if (ret || ipt.error) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) kfree(apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318) kfree(apoll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) trace_io_uring_poll_arm(ctx, req->opcode, req->user_data, mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) apoll->poll.events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) static bool __io_poll_remove_one(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328) struct io_poll_iocb *poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) bool do_complete = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332) spin_lock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) WRITE_ONCE(poll->canceled, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) if (!list_empty(&poll->wait.entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) list_del_init(&poll->wait.entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) do_complete = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) spin_unlock(&poll->head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) hash_del(&req->hash_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) return do_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) static bool io_poll_remove_one(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) bool do_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347) io_poll_remove_double(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) if (req->opcode == IORING_OP_POLL_ADD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) do_complete = __io_poll_remove_one(req, &req->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352) struct async_poll *apoll = req->apoll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) /* non-poll requests have submit ref still */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355) do_complete = __io_poll_remove_one(req, &apoll->poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) if (do_complete) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) kfree(apoll->double_poll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) kfree(apoll);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) if (do_complete) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) io_cqring_fill_event(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) io_commit_cqring(req->ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) return do_complete;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374) * Returns true if we found and killed one or more poll requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379) struct hlist_node *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) int posted = 0, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) struct hlist_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) list = &ctx->cancel_hash[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388) hlist_for_each_entry_safe(req, tmp, list, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) if (io_match_task(req, tsk, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390) posted += io_poll_remove_one(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395) if (posted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) return posted != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) struct hlist_head *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) hlist_for_each_entry(req, list, hash_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) if (sqe_addr != req->user_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410) if (io_poll_remove_one(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412) return -EALREADY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418) static int io_poll_remove_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) sqe->poll_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427) req->poll.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432) * Find a running poll command that matches one specified in sqe->addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) * and remove it if found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) static int io_poll_remove(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) u64 addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) addr = req->poll.addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) ret = io_poll_cancel(ctx, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) struct io_kiocb *req = wait->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) struct io_poll_iocb *poll = &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461) static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) struct poll_table_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464) struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469) static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471) struct io_poll_iocb *poll = &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) u32 events;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) events = READ_ONCE(sqe->poll32_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) #ifdef __BIG_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) events = swahw32(events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484) (events & EPOLLEXCLUSIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) static int io_poll_add(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) struct io_poll_iocb *poll = &req->poll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) struct io_poll_table ipt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) __poll_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) ipt.pt._qproc = io_poll_queue_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) io_poll_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) if (mask) { /* no async, we'd stolen it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) ipt.error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) io_poll_complete(req, mask, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506) if (mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) return ipt.error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) struct io_timeout_data *data = container_of(timer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) struct io_timeout_data, timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) struct io_kiocb *req = data->req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) list_del_init(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) atomic_set(&req->ctx->cq_timeouts,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) atomic_read(&req->ctx->cq_timeouts) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) io_cqring_fill_event(req, -ETIME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) return HRTIMER_NORESTART;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536) static int __io_timeout_cancel(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) struct io_timeout_data *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) ret = hrtimer_try_to_cancel(&io->timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542) if (ret == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) return -EALREADY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) list_del_init(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) io_cqring_fill_event(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548) io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) int ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) if (user_data == req->user_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) return __io_timeout_cancel(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) static int io_timeout_remove_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) req->timeout_rem.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) * Remove or update an existing timeout command
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) static int io_timeout_remove(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) ret = io_timeout_cancel(ctx, req->timeout_rem.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) io_cqring_fill_event(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) bool is_timeout_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) struct io_timeout_data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610) unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) u32 off = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) if (sqe->ioprio || sqe->buf_index || sqe->len != 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618) if (off && is_timeout_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) flags = READ_ONCE(sqe->timeout_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) if (flags & ~IORING_TIMEOUT_ABS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) req->timeout.off = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) if (!req->async_data && io_alloc_async_data(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629) data = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) data->req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) if (flags & IORING_TIMEOUT_ABS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) data->mode = HRTIMER_MODE_ABS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) data->mode = HRTIMER_MODE_REL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) INIT_LIST_HEAD(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) static int io_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) struct io_timeout_data *data = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649) struct list_head *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) u32 tail, off = req->timeout.off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) * sqe->off holds how many events that need to occur for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) * timeout event to be satisfied. If it isn't set, then this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657) * a pure timeout request, sequence isn't used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) if (io_is_timeout_noseq(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) entry = ctx->timeout_list.prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) goto add;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) req->timeout.target_seq = tail + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) /* Update the last seq here in case io_flush_timeouts() hasn't.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) * This is safe because ->completion_lock is held, and submissions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) * and completions are never mixed in the same ->completion_lock section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) ctx->cq_last_tm_flush = tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674) * Insertion sort, ensuring the first entry in the list is always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) * the one we need first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) list_for_each_prev(entry, &ctx->timeout_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) struct io_kiocb *nxt = list_entry(entry, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681) if (io_is_timeout_noseq(nxt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) /* nxt.seq is behind @tail, otherwise would've been completed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) if (off >= nxt->timeout.target_seq - tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687) add:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) list_add(&req->timeout.list, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689) data->timer.function = io_timeout_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) static bool io_cancel_cb(struct io_wq_work *work, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699) return req->user_data == (unsigned long) data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) enum io_wq_cancel cancel_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708) switch (cancel_ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) case IO_WQ_CANCEL_OK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) case IO_WQ_CANCEL_RUNNING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713) ret = -EALREADY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) case IO_WQ_CANCEL_NOTFOUND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724) struct io_kiocb *req, __u64 sqe_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) int success_ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) ret = io_async_cancel_one(ctx, (void *) (unsigned long) sqe_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731) if (ret != -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) ret = io_timeout_cancel(ctx, sqe_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) if (ret != -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) ret = io_poll_cancel(ctx, sqe_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) ret = success_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) io_cqring_fill_event(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745) io_commit_cqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) io_cqring_ev_posted(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) static int io_async_cancel_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759) if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) sqe->splice_fd_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765) req->cancel.addr = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) static int io_async_cancel(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) io_async_find_and_cancel(ctx, req, req->cancel.addr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) static int io_files_update_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782) if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) if (sqe->ioprio || sqe->rw_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) req->files_update.offset = READ_ONCE(sqe->off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788) req->files_update.nr_args = READ_ONCE(sqe->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) if (!req->files_update.nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) req->files_update.arg = READ_ONCE(sqe->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) static int io_files_update(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) struct io_uring_files_update up;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) if (force_nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805) up.offset = req->files_update.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) up.fds = req->files_update.arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809) ret = __io_sqe_files_update(ctx, &up, req->files_update.nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814) __io_req_complete(req, ret, 0, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821) case IORING_OP_NOP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824) case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) return io_read_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) case IORING_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830) return io_write_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) case IORING_OP_POLL_ADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832) return io_poll_add_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) case IORING_OP_POLL_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) return io_poll_remove_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) case IORING_OP_FSYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836) return io_prep_fsync(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) case IORING_OP_SYNC_FILE_RANGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838) return io_prep_sfr(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) case IORING_OP_SENDMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) case IORING_OP_SEND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841) return io_sendmsg_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) case IORING_OP_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) return io_recvmsg_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) case IORING_OP_CONNECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846) return io_connect_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) case IORING_OP_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) return io_timeout_prep(req, sqe, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849) case IORING_OP_TIMEOUT_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) return io_timeout_remove_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) case IORING_OP_ASYNC_CANCEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) return io_async_cancel_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) case IORING_OP_LINK_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) return io_timeout_prep(req, sqe, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) case IORING_OP_ACCEPT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856) return io_accept_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857) case IORING_OP_FALLOCATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) return io_fallocate_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859) case IORING_OP_OPENAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) return io_openat_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) case IORING_OP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) return io_close_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) case IORING_OP_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864) return io_files_update_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) case IORING_OP_STATX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) return io_statx_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) case IORING_OP_FADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) return io_fadvise_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869) case IORING_OP_MADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) return io_madvise_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) case IORING_OP_OPENAT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) return io_openat2_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) case IORING_OP_EPOLL_CTL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) return io_epoll_ctl_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) case IORING_OP_SPLICE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) return io_splice_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877) case IORING_OP_PROVIDE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) return io_provide_buffers_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) case IORING_OP_REMOVE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880) return io_remove_buffers_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) case IORING_OP_TEE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) return io_tee_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) req->opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) return-EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) static int io_req_defer_prep(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) if (!sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) if (io_alloc_async_data(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) return io_req_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900) static u32 io_get_sequence(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) struct io_kiocb *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) u32 total_submitted, nr_reqs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) if (req->flags & REQ_F_LINK_HEAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907) list_for_each_entry(pos, &req->link_list, link_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) nr_reqs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) return total_submitted - nr_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) struct io_defer_entry *de;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919) u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) /* Still need defer if there is pending req in defer list. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) if (likely(list_empty_careful(&ctx->defer_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) !(req->flags & REQ_F_IO_DRAIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) seq = io_get_sequence(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927) /* Still a chance to pass the sequence check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932) ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) io_prep_async_link(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) de = kmalloc(sizeof(*de), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) if (!de)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) kfree(de);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946) return -EIOCBQUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) trace_io_uring_defer(ctx, req, req->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) de->req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) de->seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) list_add_tail(&de->list, &ctx->defer_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) return -EIOCBQUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) static void io_req_drop_files(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) struct io_uring_task *tctx = req->task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) if (req->work.flags & IO_WQ_WORK_FILES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) put_files_struct(req->work.identity->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965) put_nsproxy(req->work.identity->nsproxy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) spin_lock_irqsave(&ctx->inflight_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) list_del(&req->inflight_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) spin_unlock_irqrestore(&ctx->inflight_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970) req->flags &= ~REQ_F_INFLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971) req->work.flags &= ~IO_WQ_WORK_FILES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) if (atomic_read(&tctx->in_idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) wake_up(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976) static void __io_clean_op(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) if (req->flags & REQ_F_BUFFER_SELECTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983) kfree((void *)(unsigned long)req->rw.addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985) case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986) case IORING_OP_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) kfree(req->sr_msg.kbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990) req->flags &= ~REQ_F_BUFFER_SELECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993) if (req->flags & REQ_F_NEED_CLEANUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996) case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998) case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999) case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) case IORING_OP_WRITE: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001) struct io_async_rw *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) if (io->free_iovec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003) kfree(io->free_iovec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007) case IORING_OP_SENDMSG: {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) struct io_async_msghdr *io = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) if (io->iov != io->fast_iov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) kfree(io->iov);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013) case IORING_OP_SPLICE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014) case IORING_OP_TEE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015) io_put_file(req, req->splice.file_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016) (req->splice.flags & SPLICE_F_FD_IN_FIXED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) case IORING_OP_OPENAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019) case IORING_OP_OPENAT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) if (req->open.filename)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) putname(req->open.filename);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) req->flags &= ~REQ_F_NEED_CLEANUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034) switch (req->opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) case IORING_OP_NOP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) ret = io_nop(req, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) case IORING_OP_READV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) case IORING_OP_READ_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040) case IORING_OP_READ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041) ret = io_read(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) case IORING_OP_WRITEV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044) case IORING_OP_WRITE_FIXED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) case IORING_OP_WRITE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046) ret = io_write(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) case IORING_OP_FSYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049) ret = io_fsync(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051) case IORING_OP_POLL_ADD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052) ret = io_poll_add(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) case IORING_OP_POLL_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) ret = io_poll_remove(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057) case IORING_OP_SYNC_FILE_RANGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) ret = io_sync_file_range(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) case IORING_OP_SENDMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) ret = io_sendmsg(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063) case IORING_OP_SEND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) ret = io_send(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) case IORING_OP_RECVMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067) ret = io_recvmsg(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) case IORING_OP_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070) ret = io_recv(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072) case IORING_OP_TIMEOUT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073) ret = io_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075) case IORING_OP_TIMEOUT_REMOVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076) ret = io_timeout_remove(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078) case IORING_OP_ACCEPT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079) ret = io_accept(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081) case IORING_OP_CONNECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082) ret = io_connect(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084) case IORING_OP_ASYNC_CANCEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) ret = io_async_cancel(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) case IORING_OP_FALLOCATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088) ret = io_fallocate(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090) case IORING_OP_OPENAT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) ret = io_openat(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) case IORING_OP_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) ret = io_close(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096) case IORING_OP_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097) ret = io_files_update(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) case IORING_OP_STATX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100) ret = io_statx(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102) case IORING_OP_FADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) ret = io_fadvise(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) case IORING_OP_MADVISE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) ret = io_madvise(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108) case IORING_OP_OPENAT2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) ret = io_openat2(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) case IORING_OP_EPOLL_CTL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) ret = io_epoll_ctl(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114) case IORING_OP_SPLICE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115) ret = io_splice(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) case IORING_OP_PROVIDE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) ret = io_provide_buffers(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120) case IORING_OP_REMOVE_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) ret = io_remove_buffers(req, force_nonblock, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) case IORING_OP_TEE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124) ret = io_tee(req, force_nonblock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) /* If the op doesn't have a file, we're not polling for it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136) const bool in_async = io_wq_current_is_worker();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) /* workqueue context doesn't hold uring_lock, grab it now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139) if (in_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142) io_iopoll_req_issued(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144) if (in_async)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154) struct io_kiocb *timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) timeout = io_prep_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158) if (timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159) io_queue_linked_timeout(timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161) /* if NO_CANCEL is set, we must still run the work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162) if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163) IO_WQ_WORK_CANCEL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164) ret = -ECANCELED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169) ret = io_issue_sqe(req, false, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171) * We can get EAGAIN for polled IO even though we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172) * forcing a sync submission from here, since we can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) * wait for request slots on the block side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) if (ret != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) struct io_ring_ctx *lock_ctx = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184) if (req->ctx->flags & IORING_SETUP_IOPOLL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) lock_ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) * io_iopoll_complete() does not hold completion_lock to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) * complete polled io, so here for polled io, we can not call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190) * io_req_complete() directly, otherwise there maybe concurrent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191) * access to cqring, defer_list, etc, which is not safe. Given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) * that io_iopoll_complete() is always called under uring_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193) * so here for polled io, we also get uring_lock to complete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) * it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196) if (lock_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) mutex_lock(&lock_ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202) if (lock_ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203) mutex_unlock(&lock_ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206) return io_steal_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210) int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212) struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) table = &ctx->file_data->table[index >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) return table->files[index & IORING_FILE_TABLE_MASK];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218) static struct file *io_file_get(struct io_submit_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) struct io_kiocb *req, int fd, bool fixed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224) if (fixed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225) if (unlikely((unsigned int)fd >= ctx->nr_user_files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227) fd = array_index_nospec(fd, ctx->nr_user_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228) file = io_file_from_index(ctx, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229) if (file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230) req->fixed_file_refs = &ctx->file_data->node->refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) percpu_ref_get(req->fixed_file_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) trace_io_uring_file_get(ctx, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235) file = __io_file_get(state, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238) if (file && file->f_op == &io_uring_fops &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239) !(req->flags & REQ_F_INFLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241) req->flags |= REQ_F_INFLIGHT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243) spin_lock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244) list_add(&req->inflight_entry, &ctx->inflight_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) spin_unlock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248) return file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251) static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254) bool fixed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257) if (unlikely(!fixed && io_async_submit(req->ctx)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) req->file = io_file_get(state, req, fd, fixed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) if (req->file || io_op_defs[req->opcode].needs_file_no_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266) static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268) struct io_timeout_data *data = container_of(timer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) struct io_timeout_data, timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270) struct io_kiocb *req = data->req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) struct io_kiocb *prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278) * We don't expect the list to be empty, that will only happen if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279) * race with the completion of the linked work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) if (!list_empty(&req->link_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) prev = list_entry(req->link_list.prev, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283) link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) list_del_init(&req->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) if (!refcount_inc_not_zero(&prev->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286) prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) list_del(&req->timeout.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) if (prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293) io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294) io_put_req_deferred(prev, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) io_cqring_add_event(req, -ETIME, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297) io_put_req_deferred(req, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299) return HRTIMER_NORESTART;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) static void __io_queue_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305) * If the list is now empty, then our linked request finished before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) * we got a chance to setup the timer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308) if (!list_empty(&req->link_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) struct io_timeout_data *data = req->async_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311) data->timer.function = io_link_timeout_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312) hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313) data->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) static void io_queue_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) __io_queue_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) /* drop submission reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329) static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) struct io_kiocb *nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) if (!(req->flags & REQ_F_LINK_HEAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335) if (req->flags & REQ_F_LINK_TIMEOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338) nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) nxt->flags |= REQ_F_LTIMEOUT_ACTIVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) req->flags |= REQ_F_LINK_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) return nxt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) static void __io_queue_sqe(struct io_kiocb *req, struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) struct io_kiocb *linked_timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) const struct cred *old_creds = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355) linked_timeout = io_prep_linked_timeout(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) if ((req->flags & REQ_F_WORK_INITIALIZED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) (req->work.flags & IO_WQ_WORK_CREDS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) req->work.identity->creds != current_cred()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360) if (old_creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361) revert_creds(old_creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) if (old_creds == req->work.identity->creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) old_creds = NULL; /* restored original creds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365) old_creds = override_creds(req->work.identity->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368) ret = io_issue_sqe(req, true, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371) * We async punt it if the file wasn't marked NOWAIT, or if the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) * doesn't support non-blocking read/write attempts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374) if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375) if (!io_arm_poll_handler(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377) * Queued up for async execution, worker will release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378) * submit reference when the iocb is actually submitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380) io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383) if (linked_timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384) io_queue_linked_timeout(linked_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385) } else if (likely(!ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386) /* drop submission reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387) req = io_put_req_find_next(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388) if (linked_timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389) io_queue_linked_timeout(linked_timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391) if (req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392) if (!(req->flags & REQ_F_FORCE_ASYNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394) io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397) /* un-prep timeout, so it'll be killed as any other linked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) req->flags &= ~REQ_F_LINK_TIMEOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) if (old_creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405) revert_creds(old_creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408) static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) ret = io_req_defer(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415) if (ret != -EIOCBQUEUED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) fail_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417) req_set_fail_links(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419) io_req_complete(req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) } else if (req->flags & REQ_F_FORCE_ASYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) if (!req->async_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425) goto fail_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) io_queue_async_work(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429) if (sqe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) ret = io_req_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432) goto fail_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) __io_queue_sqe(req, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438) static inline void io_queue_link_head(struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439) struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443) io_req_complete(req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445) io_queue_sqe(req, NULL, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448) static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) struct io_kiocb **link, struct io_comp_state *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) * If we already have a head request, queue this one for async
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456) * submittal once the head completes. If we don't have a head but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457) * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458) * submitted sync once the chain is complete. If none of those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) * conditions are true (normal request), then just queue it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) if (*link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462) struct io_kiocb *head = *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) * Taking sequential execution of a link, draining both sides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) * of the link also fullfils IOSQE_IO_DRAIN semantics for all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) * requests in the link. So, it drains the head and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468) * next after the link request. The last one is done via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) * drain_next flag to persist the effect across calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) if (req->flags & REQ_F_IO_DRAIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) head->flags |= REQ_F_IO_DRAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) ctx->drain_next = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477) /* fail even hard links since we don't submit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) head->flags |= REQ_F_FAIL_LINK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481) trace_io_uring_link(ctx, req, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) list_add_tail(&req->link_list, &head->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484) /* last request of a link, enqueue the link */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) io_queue_link_head(head, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487) *link = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) if (unlikely(ctx->drain_next)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) req->flags |= REQ_F_IO_DRAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) ctx->drain_next = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495) req->flags |= REQ_F_LINK_HEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496) INIT_LIST_HEAD(&req->link_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498) ret = io_req_defer_prep(req, sqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) req->flags |= REQ_F_FAIL_LINK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) *link = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) io_queue_sqe(req, sqe, cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511) * Batched submission is done, ensure local IO is flushed out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) static void io_submit_state_end(struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515) if (!list_empty(&state->comp.list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516) io_submit_flush_completions(&state->comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) blk_finish_plug(&state->plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) io_state_file_put(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519) if (state->free_reqs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520) kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524) * Start submission side cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) static void io_submit_state_start(struct io_submit_state *state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527) struct io_ring_ctx *ctx, unsigned int max_ios)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529) blk_start_plug(&state->plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) state->comp.nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531) INIT_LIST_HEAD(&state->comp.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) state->comp.ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) state->free_reqs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534) state->file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535) state->ios_left = max_ios;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) static void io_commit_sqring(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540) struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543) * Ensure any loads from the SQEs are done at this point,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544) * since once we write the new head, the application could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545) * write new data to them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547) smp_store_release(&rings->sq.head, ctx->cached_sq_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551) * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552) * that is mapped by userspace. This means that care needs to be taken to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553) * ensure that reads are stable, as we cannot rely on userspace always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554) * being a good citizen. If members of the sqe are validated and then later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555) * used, it's important that those reads are done through READ_ONCE() to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556) * prevent a re-load down the line.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558) static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560) u32 *sq_array = ctx->sq_array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561) unsigned head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564) * The cached sq head (or cq tail) serves two purposes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) * 1) allows us to batch the cost of updating the user visible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567) * head updates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) * 2) allows the kernel side to track the head on its own, even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569) * though the application is the one updating it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) head = READ_ONCE(sq_array[ctx->cached_sq_head & ctx->sq_mask]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572) if (likely(head < ctx->sq_entries))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) return &ctx->sq_sqes[head];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) /* drop invalid entries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576) ctx->cached_sq_dropped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577) WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581) static inline void io_consume_sqe(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583) ctx->cached_sq_head++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587) * Check SQE restrictions (opcode and flags).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589) * Returns 'true' if SQE is allowed, 'false' otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591) static inline bool io_check_restriction(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593) unsigned int sqe_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595) if (!ctx->restricted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601) if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602) ctx->restrictions.sqe_flags_required)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605) if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) ctx->restrictions.sqe_flags_required))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613) IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) IOSQE_BUFFER_SELECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617) const struct io_uring_sqe *sqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618) struct io_submit_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) unsigned int sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621) int id, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) req->opcode = READ_ONCE(sqe->opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624) req->user_data = READ_ONCE(sqe->user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) req->async_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626) req->file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) req->ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) req->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629) /* one is dropped after submission, the other at completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630) refcount_set(&req->refs, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) req->task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) req->result = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634) if (unlikely(req->opcode >= IORING_OP_LAST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) if (unlikely(io_sq_thread_acquire_mm(ctx, req)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640) sqe_flags = READ_ONCE(sqe->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641) /* enforce forwards compatibility on users */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642) if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649) !io_op_defs[req->opcode].buffer_select)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652) id = READ_ONCE(sqe->personality);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) if (id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654) struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) iod = xa_load(&ctx->personalities, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) if (unlikely(!iod))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) refcount_inc(&iod->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661) __io_req_init_async(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662) get_cred(iod->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) req->work.identity = iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664) req->work.flags |= IO_WQ_WORK_CREDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) /* same numerical values with corresponding REQ_F_*, safe to copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668) req->flags |= sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670) if (!io_op_defs[req->opcode].needs_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673) ret = io_req_set_file(state, req, READ_ONCE(sqe->fd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) state->ios_left--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680) struct io_submit_state state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) struct io_kiocb *link = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) int i, submitted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) /* if we have a backlog and couldn't flush it all, return BUSY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685) if (test_bit(0, &ctx->sq_check_overflow)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) /* make sure SQ entry isn't read before tail */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691) nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) if (!percpu_ref_tryget_many(&ctx->refs, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) percpu_counter_add(¤t->io_uring->inflight, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) refcount_add(nr, ¤t->usage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) io_submit_state_start(&state, ctx, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702) const struct io_uring_sqe *sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706) sqe = io_get_sqe(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707) if (unlikely(!sqe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708) io_consume_sqe(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711) req = io_alloc_req(ctx, &state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712) if (unlikely(!req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713) if (!submitted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) submitted = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) io_consume_sqe(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) /* will complete beyond this point, count as submitted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719) submitted++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6721) err = io_init_req(ctx, req, sqe, &state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6722) if (unlikely(err)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6723) fail_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6724) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6725) io_req_complete(req, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6726) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6729) trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6730) true, io_async_submit(ctx));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6731) err = io_submit_sqe(req, sqe, &link, &state.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6732) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6733) goto fail_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6736) if (unlikely(submitted != nr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6737) int ref_used = (submitted == -EAGAIN) ? 0 : submitted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6738) struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6739) int unused = nr - ref_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6741) percpu_ref_put_many(&ctx->refs, unused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6742) percpu_counter_sub(&tctx->inflight, unused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6743) put_task_struct_many(current, unused);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6745) if (link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6746) io_queue_link_head(link, &state.comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6747) io_submit_state_end(&state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6749) /* Commit SQ ring head once we've consumed and submitted all SQEs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6750) io_commit_sqring(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6752) return submitted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6755) static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6757) /* Tell userspace we may need a wakeup call */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6758) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6759) ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6760) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6763) static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6765) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6766) ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6767) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6770) static int io_sq_wake_function(struct wait_queue_entry *wqe, unsigned mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6771) int sync, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6773) struct io_ring_ctx *ctx = container_of(wqe, struct io_ring_ctx, sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6774) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6776) ret = autoremove_wake_function(wqe, mode, sync, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6777) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6778) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6780) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6781) ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6782) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6783) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6784) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6787) enum sq_ret {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6788) SQT_IDLE = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6789) SQT_SPIN = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6790) SQT_DID_WORK = 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6791) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6793) static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6794) unsigned long start_jiffies, bool cap_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6796) unsigned long timeout = start_jiffies + ctx->sq_thread_idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6797) struct io_sq_data *sqd = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6798) unsigned int to_submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6799) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6801) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6802) if (!list_empty(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6803) unsigned nr_events = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6805) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6806) if (!list_empty(&ctx->iopoll_list) && !need_resched())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6807) io_do_iopoll(ctx, &nr_events, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6808) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6811) to_submit = io_sqring_entries(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6813) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6814) * If submit got -EBUSY, flag us as needing the application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6815) * to enter the kernel to reap and flush events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6816) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6817) if (!to_submit || ret == -EBUSY || need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6818) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6819) * Drop cur_mm before scheduling, we can't hold it for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6820) * long periods (or over schedule()). Do this before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6821) * adding ourselves to the waitqueue, as the unuse/drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6822) * may sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6823) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6824) io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6826) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6827) * We're polling. If we're within the defined idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6828) * period, then let us spin without work before going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6829) * to sleep. The exception is if we got EBUSY doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6830) * more IO, we should wait for the application to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6831) * reap events and wake us up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6832) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6833) if (!list_empty(&ctx->iopoll_list) || need_resched() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6834) (!time_after(jiffies, timeout) && ret != -EBUSY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6835) !percpu_ref_is_dying(&ctx->refs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6836) return SQT_SPIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6838) prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6839) TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6841) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6842) * While doing polled IO, before going to sleep, we need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6843) * to check if there are new reqs added to iopoll_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6844) * it is because reqs may have been punted to io worker
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6845) * and will be added to iopoll_list later, hence check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6846) * the iopoll_list again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6848) if ((ctx->flags & IORING_SETUP_IOPOLL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6849) !list_empty_careful(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6850) finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6851) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6854) to_submit = io_sqring_entries(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6855) if (!to_submit || ret == -EBUSY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6856) return SQT_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6859) finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6860) io_ring_clear_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6862) /* if we're handling multiple rings, cap submit size for fairness */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6863) if (cap_entries && to_submit > 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6864) to_submit = 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6866) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6867) if (likely(!percpu_ref_is_dying(&ctx->refs) && !ctx->sqo_dead))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6868) ret = io_submit_sqes(ctx, to_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6869) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6871) if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6872) wake_up(&ctx->sqo_sq_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6874) return SQT_DID_WORK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6877) static void io_sqd_init_new(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6879) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6881) while (!list_empty(&sqd->ctx_new_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6882) ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6883) init_wait(&ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6884) ctx->sqo_wait_entry.func = io_sq_wake_function;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6885) list_move_tail(&ctx->sqd_list, &sqd->ctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6886) complete(&ctx->sq_thread_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6890) static int io_sq_thread(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6891) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6892) struct cgroup_subsys_state *cur_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6893) const struct cred *old_cred = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6894) struct io_sq_data *sqd = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6895) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6896) unsigned long start_jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6898) start_jiffies = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6899) while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6900) enum sq_ret ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6901) bool cap_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6903) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6904) * Any changes to the sqd lists are synchronized through the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6905) * kthread parking. This synchronizes the thread vs users,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6906) * the users are synchronized on the sqd->ctx_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6907) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6908) if (kthread_should_park()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6909) kthread_parkme();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6910) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6911) * When sq thread is unparked, in case the previous park operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6912) * comes from io_put_sq_data(), which means that sq thread is going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6913) * to be stopped, so here needs to have a check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6914) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6915) if (kthread_should_stop())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6916) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6919) if (unlikely(!list_empty(&sqd->ctx_new_list)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6920) io_sqd_init_new(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6922) cap_entries = !list_is_singular(&sqd->ctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6924) list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6925) if (current->cred != ctx->creds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6926) if (old_cred)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6927) revert_creds(old_cred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6928) old_cred = override_creds(ctx->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6930) io_sq_thread_associate_blkcg(ctx, &cur_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6931) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6932) current->loginuid = ctx->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6933) current->sessionid = ctx->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6934) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6936) ret |= __io_sq_thread(ctx, start_jiffies, cap_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6938) io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6941) if (ret & SQT_SPIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6942) io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6943) io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6944) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6945) } else if (ret == SQT_IDLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6946) if (kthread_should_park())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6947) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6948) list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6949) io_ring_set_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6950) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6951) start_jiffies = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6952) list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6953) io_ring_clear_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6957) io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6958) io_sq_thread_drop_mm();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6960) if (cur_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6961) io_sq_thread_unassociate_blkcg();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6962) if (old_cred)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6963) revert_creds(old_cred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6965) kthread_parkme();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6967) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6970) struct io_wait_queue {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6971) struct wait_queue_entry wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6972) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6973) unsigned to_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6974) unsigned nr_timeouts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6975) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6977) static inline bool io_should_wake(struct io_wait_queue *iowq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6979) struct io_ring_ctx *ctx = iowq->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6981) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6982) * Wake up if we have enough events, or if a timeout occurred since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6983) * started waiting. For timeouts, we always want to return to userspace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6984) * regardless of event count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6985) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6986) return io_cqring_events(ctx) >= iowq->to_wait ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6987) atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6990) static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6991) int wake_flags, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6993) struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6994) wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6996) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6997) * Cannot safely flush overflowed CQEs from here, ensure we wake up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6998) * the task, and the next invocation will do it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6999) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7000) if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7001) return autoremove_wake_function(curr, mode, wake_flags, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7002) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7005) static int io_run_task_work_sig(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7007) if (io_run_task_work())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7008) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7009) if (!signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7010) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7011) if (current->jobctl & JOBCTL_TASK_WORK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7012) spin_lock_irq(¤t->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7013) current->jobctl &= ~JOBCTL_TASK_WORK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7014) recalc_sigpending();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7015) spin_unlock_irq(¤t->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7016) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7017) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7018) return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7021) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7022) * Wait until events become available, if we don't already have some. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7023) * application must reap them itself, as they reside on the shared cq ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7024) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7025) static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7026) const sigset_t __user *sig, size_t sigsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7027) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7028) struct io_wait_queue iowq = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7029) .wq = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7030) .private = current,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7031) .func = io_wake_function,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7032) .entry = LIST_HEAD_INIT(iowq.wq.entry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7033) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7034) .ctx = ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7035) .to_wait = min_events,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7036) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7037) struct io_rings *rings = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7038) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7040) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7041) io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7042) if (io_cqring_events(ctx) >= min_events)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7043) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7044) if (!io_run_task_work())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7045) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7046) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7048) if (sig) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7049) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7050) if (in_compat_syscall())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7051) ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7052) sigsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7053) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7054) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7055) ret = set_user_sigmask(sig, sigsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7057) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7058) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7061) iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7062) trace_io_uring_cqring_wait(ctx, min_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7063) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7064) io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7065) prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7066) TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7067) /* make sure we run task_work before checking for signals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7068) ret = io_run_task_work_sig();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7069) if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7070) finish_wait(&ctx->wait, &iowq.wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7071) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7073) else if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7074) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7075) if (io_should_wake(&iowq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7076) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7077) if (test_bit(0, &ctx->cq_check_overflow)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7078) finish_wait(&ctx->wait, &iowq.wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7079) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7081) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7082) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7083) finish_wait(&ctx->wait, &iowq.wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7085) restore_saved_sigmask_unless(ret == -EINTR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7087) return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7090) static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7091) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7092) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7093) if (ctx->ring_sock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7094) struct sock *sock = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7095) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7097) while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7098) kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7100) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7101) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7103) for (i = 0; i < ctx->nr_user_files; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7104) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7106) file = io_file_from_index(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7107) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7108) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7110) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7113) static void io_file_ref_kill(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7115) struct fixed_file_data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7117) data = container_of(ref, struct fixed_file_data, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7118) complete(&data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7121) static void io_sqe_files_set_node(struct fixed_file_data *file_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7122) struct fixed_file_ref_node *ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7124) spin_lock_bh(&file_data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7125) file_data->node = ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7126) list_add_tail(&ref_node->node, &file_data->ref_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7127) spin_unlock_bh(&file_data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7128) percpu_ref_get(&file_data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7131) static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7133) struct fixed_file_data *data = ctx->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7134) struct fixed_file_ref_node *backup_node, *ref_node = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7135) unsigned nr_tables, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7136) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7138) if (!data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7139) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7140) backup_node = alloc_fixed_file_ref_node(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7141) if (!backup_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7142) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7144) spin_lock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7145) ref_node = data->node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7146) spin_unlock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7147) if (ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7148) percpu_ref_kill(&ref_node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7150) percpu_ref_kill(&data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7152) /* wait for all refs nodes to complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7153) flush_delayed_work(&ctx->file_put_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7154) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7155) ret = wait_for_completion_interruptible(&data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7156) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7157) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7158) ret = io_run_task_work_sig();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7159) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7160) percpu_ref_resurrect(&data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7161) reinit_completion(&data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7162) io_sqe_files_set_node(data, backup_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7163) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7165) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7167) __io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7168) nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7169) for (i = 0; i < nr_tables; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7170) kfree(data->table[i].files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7171) kfree(data->table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7172) percpu_ref_exit(&data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7173) kfree(data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7174) ctx->file_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7175) ctx->nr_user_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7176) destroy_fixed_file_ref_node(backup_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7177) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7180) static void io_put_sq_data(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7182) if (refcount_dec_and_test(&sqd->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7183) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7184) * The park is a bit of a work-around, without it we get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7185) * warning spews on shutdown with SQPOLL set and affinity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7186) * set to a single CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7188) if (sqd->thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7189) kthread_park(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7190) kthread_stop(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7193) kfree(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7197) static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7199) struct io_ring_ctx *ctx_attach;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7200) struct io_sq_data *sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7201) struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7203) f = fdget(p->wq_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7204) if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7205) return ERR_PTR(-ENXIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7206) if (f.file->f_op != &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7207) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7208) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7211) ctx_attach = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7212) sqd = ctx_attach->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7213) if (!sqd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7214) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7215) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7218) refcount_inc(&sqd->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7219) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7220) return sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7223) static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7224) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7225) struct io_sq_data *sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7227) if (p->flags & IORING_SETUP_ATTACH_WQ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7228) return io_attach_sq_data(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7230) sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7231) if (!sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7232) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7234) refcount_set(&sqd->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7235) INIT_LIST_HEAD(&sqd->ctx_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7236) INIT_LIST_HEAD(&sqd->ctx_new_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7237) mutex_init(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7238) mutex_init(&sqd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7239) init_waitqueue_head(&sqd->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7240) return sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7243) static void io_sq_thread_unpark(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7244) __releases(&sqd->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7246) if (!sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7247) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7248) kthread_unpark(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7249) mutex_unlock(&sqd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7252) static void io_sq_thread_park(struct io_sq_data *sqd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7253) __acquires(&sqd->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7255) if (!sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7256) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7257) mutex_lock(&sqd->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7258) kthread_park(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7261) static void io_sq_thread_stop(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7263) struct io_sq_data *sqd = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7265) if (sqd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7266) if (sqd->thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7267) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7268) * We may arrive here from the error branch in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7269) * io_sq_offload_create() where the kthread is created
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7270) * without being waked up, thus wake it up now to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7271) * sure the wait will complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7273) wake_up_process(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7274) wait_for_completion(&ctx->sq_thread_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7276) io_sq_thread_park(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7279) mutex_lock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7280) list_del(&ctx->sqd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7281) mutex_unlock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7283) if (sqd->thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7284) finish_wait(&sqd->wait, &ctx->sqo_wait_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7285) io_sq_thread_unpark(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7288) io_put_sq_data(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7289) ctx->sq_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7293) static void io_finish_async(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7294) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7295) io_sq_thread_stop(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7297) if (ctx->io_wq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7298) io_wq_destroy(ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7299) ctx->io_wq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7303) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7305) * Ensure the UNIX gc is aware of our file set, so we are certain that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7306) * the io_uring can be safely unregistered on process exit, even if we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7307) * loops in the file referencing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7308) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7309) static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7311) struct sock *sk = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7312) struct scm_fp_list *fpl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7313) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7314) int i, nr_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7316) fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7317) if (!fpl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7318) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7320) skb = alloc_skb(0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7321) if (!skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7322) kfree(fpl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7323) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7326) skb->sk = sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7328) nr_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7329) fpl->user = get_uid(ctx->user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7330) for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7331) struct file *file = io_file_from_index(ctx, i + offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7333) if (!file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7334) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7335) fpl->fp[nr_files] = get_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7336) unix_inflight(fpl->user, fpl->fp[nr_files]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7337) nr_files++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7340) if (nr_files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7341) fpl->max = SCM_MAX_FD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7342) fpl->count = nr_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7343) UNIXCB(skb).fp = fpl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7344) skb->destructor = unix_destruct_scm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7345) refcount_add(skb->truesize, &sk->sk_wmem_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7346) skb_queue_head(&sk->sk_receive_queue, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7348) for (i = 0; i < nr_files; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7349) fput(fpl->fp[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7350) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7351) kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7352) free_uid(fpl->user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7353) kfree(fpl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7356) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7359) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7360) * If UNIX sockets are enabled, fd passing can cause a reference cycle which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7361) * causes regular reference counting to break down. We rely on the UNIX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7362) * garbage collection to take care of this problem for us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7363) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7364) static int io_sqe_files_scm(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7366) unsigned left, total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7367) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7369) total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7370) left = ctx->nr_user_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7371) while (left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7372) unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7374) ret = __io_sqe_files_scm(ctx, this_files, total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7375) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7376) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7377) left -= this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7378) total += this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7381) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7382) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7384) while (total < ctx->nr_user_files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7385) struct file *file = io_file_from_index(ctx, total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7387) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7388) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7389) total++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7392) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7394) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7395) static int io_sqe_files_scm(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7397) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7399) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7401) static int io_sqe_alloc_file_tables(struct fixed_file_data *file_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7402) unsigned nr_tables, unsigned nr_files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7404) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7406) for (i = 0; i < nr_tables; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7407) struct fixed_file_table *table = &file_data->table[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7408) unsigned this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7410) this_files = min(nr_files, IORING_MAX_FILES_TABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7411) table->files = kcalloc(this_files, sizeof(struct file *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7412) GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7413) if (!table->files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7414) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7415) nr_files -= this_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7418) if (i == nr_tables)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7419) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7421) for (i = 0; i < nr_tables; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7422) struct fixed_file_table *table = &file_data->table[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7423) kfree(table->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7425) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7428) static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7430) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7431) struct sock *sock = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7432) struct sk_buff_head list, *head = &sock->sk_receive_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7433) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7434) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7436) __skb_queue_head_init(&list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7438) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7439) * Find the skb that holds this file in its SCM_RIGHTS. When found,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7440) * remove this entry and rearrange the file array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7441) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7442) skb = skb_dequeue(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7443) while (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7444) struct scm_fp_list *fp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7446) fp = UNIXCB(skb).fp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7447) for (i = 0; i < fp->count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7448) int left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7450) if (fp->fp[i] != file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7451) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7453) unix_notinflight(fp->user, fp->fp[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7454) left = fp->count - 1 - i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7455) if (left) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7456) memmove(&fp->fp[i], &fp->fp[i + 1],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7457) left * sizeof(struct file *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7459) fp->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7460) if (!fp->count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7461) kfree_skb(skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7462) skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7463) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7464) __skb_queue_tail(&list, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7466) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7467) file = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7468) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7471) if (!file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7472) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7474) __skb_queue_tail(&list, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7476) skb = skb_dequeue(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7479) if (skb_peek(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7480) spin_lock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7481) while ((skb = __skb_dequeue(&list)) != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7482) __skb_queue_tail(head, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7483) spin_unlock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7485) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7486) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7487) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7490) struct io_file_put {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7491) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7492) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7493) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7495) static void __io_file_put_work(struct fixed_file_ref_node *ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7497) struct fixed_file_data *file_data = ref_node->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7498) struct io_ring_ctx *ctx = file_data->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7499) struct io_file_put *pfile, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7501) list_for_each_entry_safe(pfile, tmp, &ref_node->file_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7502) list_del(&pfile->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7503) io_ring_file_put(ctx, pfile->file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7504) kfree(pfile);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7507) percpu_ref_exit(&ref_node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7508) kfree(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7509) percpu_ref_put(&file_data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7512) static void io_file_put_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7514) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7515) struct llist_node *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7517) ctx = container_of(work, struct io_ring_ctx, file_put_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7518) node = llist_del_all(&ctx->file_put_llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7520) while (node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7521) struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7522) struct llist_node *next = node->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7524) ref_node = llist_entry(node, struct fixed_file_ref_node, llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7525) __io_file_put_work(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7526) node = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7530) static void io_file_data_ref_zero(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7532) struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7533) struct fixed_file_data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7534) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7535) bool first_add = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7536) int delay = HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7538) ref_node = container_of(ref, struct fixed_file_ref_node, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7539) data = ref_node->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7540) ctx = data->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7542) spin_lock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7543) ref_node->done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7545) while (!list_empty(&data->ref_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7546) ref_node = list_first_entry(&data->ref_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7547) struct fixed_file_ref_node, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7548) /* recycle ref nodes in order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7549) if (!ref_node->done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7550) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7551) list_del(&ref_node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7552) first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7554) spin_unlock_bh(&data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7556) if (percpu_ref_is_dying(&data->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7557) delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7559) if (!delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7560) mod_delayed_work(system_wq, &ctx->file_put_work, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7561) else if (first_add)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7562) queue_delayed_work(system_wq, &ctx->file_put_work, delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7565) static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7566) struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7567) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7568) struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7570) ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7571) if (!ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7572) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7574) if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7575) 0, GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7576) kfree(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7577) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7579) INIT_LIST_HEAD(&ref_node->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7580) INIT_LIST_HEAD(&ref_node->file_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7581) ref_node->file_data = ctx->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7582) ref_node->done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7583) return ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7586) static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7588) percpu_ref_exit(&ref_node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7589) kfree(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7592) static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7593) unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7595) __s32 __user *fds = (__s32 __user *) arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7596) unsigned nr_tables, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7597) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7598) int fd, ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7599) struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7600) struct fixed_file_data *file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7602) if (ctx->file_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7603) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7604) if (!nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7605) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7606) if (nr_args > IORING_MAX_FIXED_FILES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7607) return -EMFILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7608) if (nr_args > rlimit(RLIMIT_NOFILE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7609) return -EMFILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7611) file_data = kzalloc(sizeof(*ctx->file_data), GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7612) if (!file_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7613) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7614) file_data->ctx = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7615) init_completion(&file_data->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7616) INIT_LIST_HEAD(&file_data->ref_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7617) spin_lock_init(&file_data->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7619) nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7620) file_data->table = kcalloc(nr_tables, sizeof(*file_data->table),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7621) GFP_KERNEL_ACCOUNT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7622) if (!file_data->table)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7623) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7625) if (percpu_ref_init(&file_data->refs, io_file_ref_kill,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7626) PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7627) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7629) if (io_sqe_alloc_file_tables(file_data, nr_tables, nr_args))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7630) goto out_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7631) ctx->file_data = file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7633) for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7634) struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7635) unsigned index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7637) if (copy_from_user(&fd, &fds[i], sizeof(fd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7638) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7639) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7641) /* allow sparse sets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7642) if (fd == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7643) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7645) file = fget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7646) ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7647) if (!file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7648) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7650) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7651) * Don't allow io_uring instances to be registered. If UNIX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7652) * isn't enabled, then this causes a reference cycle and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7653) * instance can never get freed. If UNIX is enabled we'll
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7654) * handle it just fine, but there's still no point in allowing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7655) * a ring fd as it doesn't support regular read/write anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7656) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7657) if (file->f_op == &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7658) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7659) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7661) table = &file_data->table[i >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7662) index = i & IORING_FILE_TABLE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7663) table->files[index] = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7666) ret = io_sqe_files_scm(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7667) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7668) io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7669) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7672) ref_node = alloc_fixed_file_ref_node(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7673) if (!ref_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7674) io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7675) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7678) io_sqe_files_set_node(file_data, ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7679) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7680) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7681) for (i = 0; i < ctx->nr_user_files; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7682) file = io_file_from_index(ctx, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7683) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7684) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7686) for (i = 0; i < nr_tables; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7687) kfree(file_data->table[i].files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7688) ctx->nr_user_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7689) out_ref:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7690) percpu_ref_exit(&file_data->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7691) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7692) kfree(file_data->table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7693) kfree(file_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7694) ctx->file_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7695) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7698) static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7699) int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7701) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7702) struct sock *sock = ctx->ring_sock->sk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7703) struct sk_buff_head *head = &sock->sk_receive_queue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7704) struct sk_buff *skb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7707) * See if we can merge this file into an existing skb SCM_RIGHTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7708) * file set. If there's no room, fall back to allocating a new skb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7709) * and filling it in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7711) spin_lock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7712) skb = skb_peek(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7713) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7714) struct scm_fp_list *fpl = UNIXCB(skb).fp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7716) if (fpl->count < SCM_MAX_FD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7717) __skb_unlink(skb, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7718) spin_unlock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7719) fpl->fp[fpl->count] = get_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7720) unix_inflight(fpl->user, fpl->fp[fpl->count]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7721) fpl->count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7722) spin_lock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7723) __skb_queue_head(head, skb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7724) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7725) skb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7728) spin_unlock_irq(&head->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7730) if (skb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7731) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7732) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7735) return __io_sqe_files_scm(ctx, 1, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7736) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7737) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7738) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7741) static int io_queue_file_removal(struct fixed_file_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7742) struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7744) struct io_file_put *pfile;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7745) struct fixed_file_ref_node *ref_node = data->node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7747) pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7748) if (!pfile)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7749) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7751) pfile->file = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7752) list_add(&pfile->list, &ref_node->file_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7754) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7757) static int __io_sqe_files_update(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7758) struct io_uring_files_update *up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7759) unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7760) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7761) struct fixed_file_data *data = ctx->file_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7762) struct fixed_file_ref_node *ref_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7763) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7764) __s32 __user *fds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7765) int fd, i, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7766) __u32 done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7767) bool needs_switch = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7769) if (check_add_overflow(up->offset, nr_args, &done))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7770) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7771) if (done > ctx->nr_user_files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7772) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7774) ref_node = alloc_fixed_file_ref_node(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7775) if (!ref_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7776) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7778) done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7779) fds = u64_to_user_ptr(up->fds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7780) while (nr_args) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7781) struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7782) unsigned index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7784) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7785) if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7786) err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7787) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7789) i = array_index_nospec(up->offset, ctx->nr_user_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7790) table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7791) index = i & IORING_FILE_TABLE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7792) if (table->files[index]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7793) file = table->files[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7794) err = io_queue_file_removal(data, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7795) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7796) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7797) table->files[index] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7798) needs_switch = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7800) if (fd != -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7801) file = fget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7802) if (!file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7803) err = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7804) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7807) * Don't allow io_uring instances to be registered. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7808) * UNIX isn't enabled, then this causes a reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7809) * cycle and this instance can never get freed. If UNIX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7810) * is enabled we'll handle it just fine, but there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7811) * still no point in allowing a ring fd as it doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7812) * support regular read/write anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7813) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7814) if (file->f_op == &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7815) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7816) err = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7817) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7818) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7819) table->files[index] = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7820) err = io_sqe_file_register(ctx, file, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7821) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7822) table->files[index] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7823) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7824) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7827) nr_args--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7828) done++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7829) up->offset++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7832) if (needs_switch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7833) percpu_ref_kill(&data->node->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7834) io_sqe_files_set_node(data, ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7835) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7836) destroy_fixed_file_ref_node(ref_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7838) return done ? done : err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7841) static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7842) unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7844) struct io_uring_files_update up;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7846) if (!ctx->file_data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7847) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7848) if (!nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7849) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7850) if (copy_from_user(&up, arg, sizeof(up)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7851) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7852) if (up.resv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7853) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7855) return __io_sqe_files_update(ctx, &up, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7858) static void io_free_work(struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7859) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7860) struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7862) /* Consider that io_steal_work() relies on this ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7863) io_put_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7866) static int io_init_wq_offload(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7867) struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7869) struct io_wq_data data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7870) struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7871) struct io_ring_ctx *ctx_attach;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7872) unsigned int concurrency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7873) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7875) data.user = ctx->user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7876) data.free_work = io_free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7877) data.do_work = io_wq_submit_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7879) if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7880) /* Do QD, or 4 * CPUS, whatever is smallest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7881) concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7883) ctx->io_wq = io_wq_create(concurrency, &data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7884) if (IS_ERR(ctx->io_wq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7885) ret = PTR_ERR(ctx->io_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7886) ctx->io_wq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7888) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7891) f = fdget(p->wq_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7892) if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7893) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7895) if (f.file->f_op != &io_uring_fops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7896) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7897) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7900) ctx_attach = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7901) /* @io_wq is protected by holding the fd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7902) if (!io_wq_get(ctx_attach->io_wq, &data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7903) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7904) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7907) ctx->io_wq = ctx_attach->io_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7908) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7909) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7910) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7912)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7913) static int io_uring_alloc_task_context(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7914) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7915) struct io_uring_task *tctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7916) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7918) tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7919) if (unlikely(!tctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7920) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7922) ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7923) if (unlikely(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7924) kfree(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7925) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7928) xa_init(&tctx->xa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7929) init_waitqueue_head(&tctx->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7930) tctx->last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7931) atomic_set(&tctx->in_idle, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7932) tctx->sqpoll = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7933) io_init_identity(&tctx->__identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7934) tctx->identity = &tctx->__identity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7935) task->io_uring = tctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7936) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7939) void __io_uring_free(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7941) struct io_uring_task *tctx = tsk->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7943) WARN_ON_ONCE(!xa_empty(&tctx->xa));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7944) WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7945) if (tctx->identity != &tctx->__identity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7946) kfree(tctx->identity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7947) percpu_counter_destroy(&tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7948) kfree(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7949) tsk->io_uring = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7952) static int io_sq_offload_create(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7953) struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7955) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7957) if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7958) struct io_sq_data *sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7960) ret = -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7961) if (!capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7962) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7964) sqd = io_get_sq_data(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7965) if (IS_ERR(sqd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7966) ret = PTR_ERR(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7967) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7970) ctx->sq_data = sqd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7971) io_sq_thread_park(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7972) mutex_lock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7973) list_add(&ctx->sqd_list, &sqd->ctx_new_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7974) mutex_unlock(&sqd->ctx_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7975) io_sq_thread_unpark(sqd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7977) ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7978) if (!ctx->sq_thread_idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7979) ctx->sq_thread_idle = HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7981) if (sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7982) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7984) if (p->flags & IORING_SETUP_SQ_AFF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7985) int cpu = p->sq_thread_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7987) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7988) if (cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7989) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7990) if (!cpu_online(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7991) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7993) sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7994) cpu, "io_uring-sq");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7995) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7996) sqd->thread = kthread_create(io_sq_thread, sqd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7997) "io_uring-sq");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7999) if (IS_ERR(sqd->thread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8000) ret = PTR_ERR(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8001) sqd->thread = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8002) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8004) ret = io_uring_alloc_task_context(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8005) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8006) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8007) } else if (p->flags & IORING_SETUP_SQ_AFF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8008) /* Can't have SQ_AFF without SQPOLL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8009) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8010) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8013) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8014) ret = io_init_wq_offload(ctx, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8015) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8016) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8018) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8019) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8020) io_finish_async(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8021) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8024) static void io_sq_offload_start(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8026) struct io_sq_data *sqd = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8028) ctx->flags &= ~IORING_SETUP_R_DISABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8029) if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd && sqd->thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8030) wake_up_process(sqd->thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8033) static inline void __io_unaccount_mem(struct user_struct *user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8034) unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8036) atomic_long_sub(nr_pages, &user->locked_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8037) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8039) static inline int __io_account_mem(struct user_struct *user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8040) unsigned long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8041) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8042) unsigned long page_limit, cur_pages, new_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8044) /* Don't allow more pages than we can safely lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8045) page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8047) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8048) cur_pages = atomic_long_read(&user->locked_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8049) new_pages = cur_pages + nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8050) if (new_pages > page_limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8051) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8052) } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8053) new_pages) != cur_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8055) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8058) static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8059) enum io_mem_account acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8060) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8061) if (ctx->limit_mem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8062) __io_unaccount_mem(ctx->user, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8064) if (ctx->mm_account) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8065) if (acct == ACCT_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8066) ctx->mm_account->locked_vm -= nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8067) else if (acct == ACCT_PINNED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8068) atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8072) static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8073) enum io_mem_account acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8075) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8077) if (ctx->limit_mem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8078) ret = __io_account_mem(ctx->user, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8079) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8080) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8083) if (ctx->mm_account) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8084) if (acct == ACCT_LOCKED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8085) ctx->mm_account->locked_vm += nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8086) else if (acct == ACCT_PINNED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8087) atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8090) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8093) static void io_mem_free(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8095) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8097) if (!ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8098) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8100) page = virt_to_head_page(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8101) if (put_page_testzero(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8102) free_compound_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8105) static void *io_mem_alloc(size_t size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8107) gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8108) __GFP_NORETRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8110) return (void *) __get_free_pages(gfp_flags, get_order(size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8113) static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8114) size_t *sq_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8116) struct io_rings *rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8117) size_t off, sq_array_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8119) off = struct_size(rings, cqes, cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8120) if (off == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8121) return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8123) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8124) off = ALIGN(off, SMP_CACHE_BYTES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8125) if (off == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8126) return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8127) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8129) if (sq_offset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8130) *sq_offset = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8132) sq_array_size = array_size(sizeof(u32), sq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8133) if (sq_array_size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8134) return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8136) if (check_add_overflow(off, sq_array_size, &off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8137) return SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8139) return off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8142) static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8144) size_t pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8146) pages = (size_t)1 << get_order(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8147) rings_size(sq_entries, cq_entries, NULL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8148) pages += (size_t)1 << get_order(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8149) array_size(sizeof(struct io_uring_sqe), sq_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8151) return pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8154) static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8156) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8158) if (!ctx->user_bufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8159) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8161) for (i = 0; i < ctx->nr_user_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8162) struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8164) for (j = 0; j < imu->nr_bvecs; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8165) unpin_user_page(imu->bvec[j].bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8167) if (imu->acct_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8168) io_unaccount_mem(ctx, imu->acct_pages, ACCT_PINNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8169) kvfree(imu->bvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8170) imu->nr_bvecs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8173) kfree(ctx->user_bufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8174) ctx->user_bufs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8175) ctx->nr_user_bufs = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8176) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8179) static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8180) void __user *arg, unsigned index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8182) struct iovec __user *src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8184) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8185) if (ctx->compat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8186) struct compat_iovec __user *ciovs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8187) struct compat_iovec ciov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8189) ciovs = (struct compat_iovec __user *) arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8190) if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8191) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8193) dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8194) dst->iov_len = ciov.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8195) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8197) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8198) src = (struct iovec __user *) arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8199) if (copy_from_user(dst, &src[index], sizeof(*dst)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8200) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8201) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8205) * Not super efficient, but this is just a registration time. And we do cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8206) * the last compound head, so generally we'll only do a full search if we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8207) * match that one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8208) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8209) * We check if the given compound head page has already been accounted, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8210) * avoid double accounting it. This allows us to account the full size of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8211) * page, not just the constituent pages of a huge page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8212) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8213) static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8214) int nr_pages, struct page *hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8216) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8218) /* check current page array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8219) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8220) if (!PageCompound(pages[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8221) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8222) if (compound_head(pages[i]) == hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8223) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8226) /* check previously registered pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8227) for (i = 0; i < ctx->nr_user_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8228) struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8230) for (j = 0; j < imu->nr_bvecs; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8231) if (!PageCompound(imu->bvec[j].bv_page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8232) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8233) if (compound_head(imu->bvec[j].bv_page) == hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8234) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8238) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8241) static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8242) int nr_pages, struct io_mapped_ubuf *imu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8243) struct page **last_hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8245) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8247) for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8248) if (!PageCompound(pages[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8249) imu->acct_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8250) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8251) struct page *hpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8253) hpage = compound_head(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8254) if (hpage == *last_hpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8255) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8256) *last_hpage = hpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8257) if (headpage_already_acct(ctx, pages, i, hpage))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8258) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8259) imu->acct_pages += page_size(hpage) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8263) if (!imu->acct_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8264) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8266) ret = io_account_mem(ctx, imu->acct_pages, ACCT_PINNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8267) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8268) imu->acct_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8269) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8272) static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8273) unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8275) struct vm_area_struct **vmas = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8276) struct page **pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8277) struct page *last_hpage = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8278) int i, j, got_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8279) int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8281) if (ctx->user_bufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8282) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8283) if (!nr_args || nr_args > UIO_MAXIOV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8284) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8286) ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8287) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8288) if (!ctx->user_bufs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8289) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8291) for (i = 0; i < nr_args; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8292) struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8293) unsigned long off, start, end, ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8294) int pret, nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8295) struct iovec iov;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8296) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8298) ret = io_copy_iov(ctx, &iov, arg, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8299) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8300) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8303) * Don't impose further limits on the size and buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8304) * constraints here, we'll -EINVAL later when IO is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8305) * submitted if they are wrong.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8306) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8307) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8308) if (!iov.iov_base || !iov.iov_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8309) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8311) /* arbitrary limit, but we need something */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8312) if (iov.iov_len > SZ_1G)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8313) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8315) ubuf = (unsigned long) iov.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8316) end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8317) start = ubuf >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8318) nr_pages = end - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8320) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8321) if (!pages || nr_pages > got_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8322) kvfree(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8323) kvfree(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8324) pages = kvmalloc_array(nr_pages, sizeof(struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8325) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8326) vmas = kvmalloc_array(nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8327) sizeof(struct vm_area_struct *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8328) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8329) if (!pages || !vmas) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8330) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8331) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8333) got_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8336) imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8337) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8338) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8339) if (!imu->bvec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8340) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8342) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8343) mmap_read_lock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8344) pret = pin_user_pages(ubuf, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8345) FOLL_WRITE | FOLL_LONGTERM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8346) pages, vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8347) if (pret == nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8348) /* don't support file backed memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8349) for (j = 0; j < nr_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8350) struct vm_area_struct *vma = vmas[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8352) if (vma->vm_file &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8353) !is_file_hugepages(vma->vm_file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8354) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8355) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8358) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8359) ret = pret < 0 ? pret : -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8361) mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8362) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8364) * if we did partial map, or found file backed vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8365) * release any pages we did get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8366) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8367) if (pret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8368) unpin_user_pages(pages, pret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8369) kvfree(imu->bvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8370) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8373) ret = io_buffer_account_pin(ctx, pages, pret, imu, &last_hpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8374) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8375) unpin_user_pages(pages, pret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8376) kvfree(imu->bvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8377) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8380) off = ubuf & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8381) size = iov.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8382) for (j = 0; j < nr_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8383) size_t vec_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8385) vec_len = min_t(size_t, size, PAGE_SIZE - off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8386) imu->bvec[j].bv_page = pages[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8387) imu->bvec[j].bv_len = vec_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8388) imu->bvec[j].bv_offset = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8389) off = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8390) size -= vec_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8392) /* store original address for later verification */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8393) imu->ubuf = ubuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8394) imu->len = iov.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8395) imu->nr_bvecs = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8397) ctx->nr_user_bufs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8399) kvfree(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8400) kvfree(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8401) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8402) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8403) kvfree(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8404) kvfree(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8405) io_sqe_buffer_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8406) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8409) static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8410) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8411) __s32 __user *fds = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8412) int fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8414) if (ctx->cq_ev_fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8415) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8417) if (copy_from_user(&fd, fds, sizeof(*fds)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8418) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8420) ctx->cq_ev_fd = eventfd_ctx_fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8421) if (IS_ERR(ctx->cq_ev_fd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8422) int ret = PTR_ERR(ctx->cq_ev_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8423) ctx->cq_ev_fd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8424) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8427) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8430) static int io_eventfd_unregister(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8431) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8432) if (ctx->cq_ev_fd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8433) eventfd_ctx_put(ctx->cq_ev_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8434) ctx->cq_ev_fd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8435) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8438) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8441) static void io_destroy_buffers(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8442) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8443) struct io_buffer *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8444) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8446) xa_for_each(&ctx->io_buffers, index, buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8447) __io_remove_buffers(ctx, buf, index, -1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8450) static void io_ring_ctx_free(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8451) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8452) io_finish_async(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8453) io_sqe_buffer_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8455) if (ctx->sqo_task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8456) put_task_struct(ctx->sqo_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8457) ctx->sqo_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8458) mmdrop(ctx->mm_account);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8459) ctx->mm_account = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8462) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8463) if (ctx->sqo_blkcg_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8464) css_put(ctx->sqo_blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8465) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8467) io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8468) io_eventfd_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8469) io_destroy_buffers(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8471) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8472) if (ctx->ring_sock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8473) ctx->ring_sock->file = NULL; /* so that iput() is called */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8474) sock_release(ctx->ring_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8476) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8478) io_mem_free(ctx->rings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8479) io_mem_free(ctx->sq_sqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8481) percpu_ref_exit(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8482) free_uid(ctx->user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8483) put_cred(ctx->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8484) kfree(ctx->cancel_hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8485) kmem_cache_free(req_cachep, ctx->fallback_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8486) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8489) static __poll_t io_uring_poll(struct file *file, poll_table *wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8491) struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8492) __poll_t mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8494) poll_wait(file, &ctx->cq_wait, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8495) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8496) * synchronizes with barrier from wq_has_sleeper call in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8497) * io_commit_cqring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8498) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8499) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8500) if (!io_sqring_full(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8501) mask |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8503) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8504) * Don't flush cqring overflow list here, just do a simple check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8505) * Otherwise there could possible be ABBA deadlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8506) * CPU0 CPU1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8507) * ---- ----
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8508) * lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8509) * lock(&ep->mtx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8510) * lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8511) * lock(&ep->mtx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8512) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8513) * Users may get EPOLLIN meanwhile seeing nothing in cqring, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8514) * pushs them to do the flush.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8516) if (io_cqring_events(ctx) || test_bit(0, &ctx->cq_check_overflow))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8517) mask |= EPOLLIN | EPOLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8519) return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8522) static int io_uring_fasync(int fd, struct file *file, int on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8524) struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8526) return fasync_helper(fd, file, on, &ctx->cq_fasync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8529) static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8531) struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8533) iod = xa_erase(&ctx->personalities, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8534) if (iod) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8535) put_cred(iod->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8536) if (refcount_dec_and_test(&iod->count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8537) kfree(iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8538) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8541) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8544) static void io_ring_exit_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8546) struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8547) exit_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8549) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8550) * If we're doing polled IO and end up having requests being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8551) * submitted async (out-of-line), then completions can come in while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8552) * we're waiting for refs to drop. We need to reap these manually,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8553) * as nobody else will be looking for them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8554) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8555) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8556) io_iopoll_try_reap_events(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8557) } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8558) io_ring_ctx_free(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8561) static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8563) struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8565) return req->ctx == data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8568) static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8570) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8571) struct io_identify *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8573) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8574) percpu_ref_kill(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8575) /* if force is set, the ring is going away. always drop after that */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8577) if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8578) ctx->sqo_dead = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8580) ctx->cq_overflow_flushed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8581) if (ctx->rings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8582) __io_cqring_overflow_flush(ctx, true, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8583) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8585) io_kill_timeouts(ctx, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8586) io_poll_remove_all(ctx, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8588) if (ctx->io_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8589) io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8591) /* if we failed setting up the ctx, we might not have any rings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8592) io_iopoll_try_reap_events(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8593) xa_for_each(&ctx->personalities, index, iod)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8594) io_unregister_personality(ctx, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8596) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8597) * Do this upfront, so we won't have a grace period where the ring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8598) * is closed but resources aren't reaped yet. This can cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8599) * spurious failure in setting up a new ring.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8600) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8601) io_unaccount_mem(ctx, ring_pages(ctx->sq_entries, ctx->cq_entries),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8602) ACCT_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8604) INIT_WORK(&ctx->exit_work, io_ring_exit_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8605) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8606) * Use system_unbound_wq to avoid spawning tons of event kworkers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8607) * if we're exiting a ton of rings at the same time. It just adds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8608) * noise and overhead, there's no discernable change in runtime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8609) * over using system_wq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8610) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8611) queue_work(system_unbound_wq, &ctx->exit_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8614) static int io_uring_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8616) struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8618) file->private_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8619) io_ring_ctx_wait_and_kill(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8620) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8623) struct io_task_cancel {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8624) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8625) struct files_struct *files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8626) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8628) static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8630) struct io_kiocb *req = container_of(work, struct io_kiocb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8631) struct io_task_cancel *cancel = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8632) bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8634) if (cancel->files && (req->flags & REQ_F_LINK_TIMEOUT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8635) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8636) struct io_ring_ctx *ctx = req->ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8638) /* protect against races with linked timeouts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8639) spin_lock_irqsave(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8640) ret = io_match_task(req, cancel->task, cancel->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8641) spin_unlock_irqrestore(&ctx->completion_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8642) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8643) ret = io_match_task(req, cancel->task, cancel->files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8645) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8648) static void io_cancel_defer_files(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8649) struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8650) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8652) struct io_defer_entry *de = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8653) LIST_HEAD(list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8655) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8656) list_for_each_entry_reverse(de, &ctx->defer_list, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8657) if (io_match_task(de->req, task, files)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8658) list_cut_position(&list, &ctx->defer_list, &de->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8659) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8662) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8664) while (!list_empty(&list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8665) de = list_first_entry(&list, struct io_defer_entry, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8666) list_del_init(&de->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8667) req_set_fail_links(de->req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8668) io_put_req(de->req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8669) io_req_complete(de->req, -ECANCELED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8670) kfree(de);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8674) static int io_uring_count_inflight(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8675) struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8676) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8677) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8678) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8679) int cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8681) spin_lock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8682) list_for_each_entry(req, &ctx->inflight_list, inflight_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8683) cnt += io_match_task(req, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8684) spin_unlock_irq(&ctx->inflight_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8685) return cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8688) static void io_uring_cancel_files(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8689) struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8690) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8692) while (!list_empty_careful(&ctx->inflight_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8693) struct io_task_cancel cancel = { .task = task, .files = files };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8694) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8695) int inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8697) inflight = io_uring_count_inflight(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8698) if (!inflight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8699) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8701) io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8702) io_poll_remove_all(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8703) io_kill_timeouts(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8704) /* cancellations _may_ trigger task work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8705) io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8707) prepare_to_wait(&task->io_uring->wait, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8708) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8709) if (inflight == io_uring_count_inflight(ctx, task, files))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8710) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8711) finish_wait(&task->io_uring->wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8715) static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8716) struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8718) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8719) struct io_task_cancel cancel = { .task = task, .files = NULL, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8720) enum io_wq_cancel cret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8721) bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8723) cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8724) if (cret != IO_WQ_CANCEL_NOTFOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8725) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8727) /* SQPOLL thread does its own polling */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8728) if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8729) while (!list_empty_careful(&ctx->iopoll_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8730) io_iopoll_try_reap_events(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8731) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8735) ret |= io_poll_remove_all(ctx, task, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8736) ret |= io_kill_timeouts(ctx, task, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8737) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8738) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8739) io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8740) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8744) static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8745) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8746) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8747) ctx->sqo_dead = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8748) if (ctx->flags & IORING_SETUP_R_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8749) io_sq_offload_start(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8750) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8752) /* make sure callers enter the ring to get error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8753) if (ctx->rings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8754) io_ring_set_wakeup_flag(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8757) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8758) * We need to iteratively cancel requests, in case a request has dependent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8759) * hard links. These persist even for failure of cancelations, hence keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8760) * looping until none are found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8761) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8762) static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8763) struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8765) struct task_struct *task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8767) if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8768) io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8769) task = ctx->sq_data->thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8770) atomic_inc(&task->io_uring->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8771) io_sq_thread_park(ctx->sq_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8774) io_cancel_defer_files(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8775) io_cqring_overflow_flush(ctx, true, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8777) if (!files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8778) __io_uring_cancel_task_requests(ctx, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8779) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8780) io_uring_cancel_files(ctx, task, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8782) if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8783) atomic_dec(&task->io_uring->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8784) io_sq_thread_unpark(ctx->sq_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8788) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8789) * Note that this task has used io_uring. We use it for cancelation purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8790) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8791) static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8792) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8793) struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8794) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8796) if (unlikely(!tctx)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8797) ret = io_uring_alloc_task_context(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8798) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8799) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8800) tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8802) if (tctx->last != file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8803) void *old = xa_load(&tctx->xa, (unsigned long)file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8805) if (!old) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8806) get_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8807) ret = xa_err(xa_store(&tctx->xa, (unsigned long)file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8808) file, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8809) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8810) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8811) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8814) tctx->last = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8817) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8818) * This is race safe in that the task itself is doing this, hence it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8819) * cannot be going through the exit/cancel paths at the same time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8820) * This cannot be modified while exit/cancel is running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8821) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8822) if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8823) tctx->sqpoll = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8825) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8826) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8828) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8829) * Remove this io_uring_file -> task mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8830) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8831) static void io_uring_del_task_file(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8833) struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8835) if (tctx->last == file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8836) tctx->last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8837) file = xa_erase(&tctx->xa, (unsigned long)file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8838) if (file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8839) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8842) static void io_uring_remove_task_files(struct io_uring_task *tctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8844) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8845) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8847) xa_for_each(&tctx->xa, index, file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8848) io_uring_del_task_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8851) void __io_uring_files_cancel(struct files_struct *files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8853) struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8854) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8855) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8857) /* make sure overflow events are dropped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8858) atomic_inc(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8859) xa_for_each(&tctx->xa, index, file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8860) io_uring_cancel_task_requests(file->private_data, files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8861) atomic_dec(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8863) if (files)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8864) io_uring_remove_task_files(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8867) static s64 tctx_inflight(struct io_uring_task *tctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8869) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8870) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8871) s64 inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8873) inflight = percpu_counter_sum(&tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8874) if (!tctx->sqpoll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8875) return inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8877) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8878) * If we have SQPOLL rings, then we need to iterate and find them, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8879) * add the pending count for those.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8880) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8881) xa_for_each(&tctx->xa, index, file) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8882) struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8884) if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8885) struct io_uring_task *__tctx = ctx->sqo_task->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8887) inflight += percpu_counter_sum(&__tctx->inflight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8891) return inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8894) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8895) * Find any io_uring fd that this task has registered or done IO on, and cancel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8896) * requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8897) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8898) void __io_uring_task_cancel(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8900) struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8901) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8902) s64 inflight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8904) /* make sure overflow events are dropped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8905) atomic_inc(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8907) /* trigger io_disable_sqo_submit() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8908) if (tctx->sqpoll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8909) __io_uring_files_cancel(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8911) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8912) /* read completions before cancelations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8913) inflight = tctx_inflight(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8914) if (!inflight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8915) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8916) __io_uring_files_cancel(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8918) prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8920) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8921) * If we've seen completions, retry without waiting. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8922) * avoids a race where a completion comes in before we did
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8923) * prepare_to_wait().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8924) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8925) if (inflight == tctx_inflight(tctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8926) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8927) finish_wait(&tctx->wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8928) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8930) atomic_dec(&tctx->in_idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8932) io_uring_remove_task_files(tctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8935) static int io_uring_flush(struct file *file, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8937) struct io_uring_task *tctx = current->io_uring;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8938) struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8940) if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8941) io_uring_cancel_task_requests(ctx, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8943) if (!tctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8944) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8946) /* we should have cancelled and erased it before PF_EXITING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8947) WARN_ON_ONCE((current->flags & PF_EXITING) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8948) xa_load(&tctx->xa, (unsigned long)file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8950) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8951) * fput() is pending, will be 2 if the only other ref is our potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8952) * task file note. If the task is exiting, drop regardless of count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8953) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8954) if (atomic_long_read(&file->f_count) != 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8955) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8957) if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8958) /* there is only one file note, which is owned by sqo_task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8959) WARN_ON_ONCE(ctx->sqo_task != current &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8960) xa_load(&tctx->xa, (unsigned long)file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8961) /* sqo_dead check is for when this happens after cancellation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8962) WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8963) !xa_load(&tctx->xa, (unsigned long)file));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8965) io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8968) if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8969) io_uring_del_task_file(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8970) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8971) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8972)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8973) static void *io_uring_validate_mmap_request(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8974) loff_t pgoff, size_t sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8976) struct io_ring_ctx *ctx = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8977) loff_t offset = pgoff << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8978) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8979) void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8981) switch (offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8982) case IORING_OFF_SQ_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8983) case IORING_OFF_CQ_RING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8984) ptr = ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8985) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8986) case IORING_OFF_SQES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8987) ptr = ctx->sq_sqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8988) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8989) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8990) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8993) page = virt_to_head_page(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8994) if (sz > page_size(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8995) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8997) return ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9000) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9002) static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9004) size_t sz = vma->vm_end - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9005) unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9006) void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9008) ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9009) if (IS_ERR(ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9010) return PTR_ERR(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9012) pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9013) return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9016) #else /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9018) static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9019) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9020) return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9023) static unsigned int io_uring_nommu_mmap_capabilities(struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9025) return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9028) static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9029) unsigned long addr, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9030) unsigned long pgoff, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9032) void *ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9033)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9034) ptr = io_uring_validate_mmap_request(file, pgoff, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9035) if (IS_ERR(ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9036) return PTR_ERR(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9038) return (unsigned long) ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9041) #endif /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9043) static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9045) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9046) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9048) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9049) if (!io_sqring_full(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9050) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9052) prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9054) if (unlikely(ctx->sqo_dead)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9055) ret = -EOWNERDEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9056) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9059) if (!io_sqring_full(ctx))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9060) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9062) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9063) } while (!signal_pending(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9065) finish_wait(&ctx->sqo_sq_wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9066) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9067) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9070) SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9071) u32, min_complete, u32, flags, const sigset_t __user *, sig,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9072) size_t, sigsz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9074) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9075) long ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9076) int submitted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9077) struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9079) io_run_task_work();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9081) if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9082) IORING_ENTER_SQ_WAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9083) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9085) f = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9086) if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9087) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9089) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9090) if (f.file->f_op != &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9091) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9093) ret = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9094) ctx = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9095) if (!percpu_ref_tryget(&ctx->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9096) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9098) ret = -EBADFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9099) if (ctx->flags & IORING_SETUP_R_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9100) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9102) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9103) * For SQ polling, the thread will do all submissions and completions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9104) * Just return the requested submit count, and wake the thread if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9105) * we were asked to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9106) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9107) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9108) if (ctx->flags & IORING_SETUP_SQPOLL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9109) io_cqring_overflow_flush(ctx, false, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9111) if (unlikely(ctx->sqo_dead)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9112) ret = -EOWNERDEAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9113) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9115) if (flags & IORING_ENTER_SQ_WAKEUP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9116) wake_up(&ctx->sq_data->wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9117) if (flags & IORING_ENTER_SQ_WAIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9118) ret = io_sqpoll_wait_sq(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9119) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9120) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9122) submitted = to_submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9123) } else if (to_submit) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9124) ret = io_uring_add_task_file(ctx, f.file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9125) if (unlikely(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9126) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9127) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9128) submitted = io_submit_sqes(ctx, to_submit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9129) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9131) if (submitted != to_submit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9132) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9134) if (flags & IORING_ENTER_GETEVENTS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9135) min_complete = min(min_complete, ctx->cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9137) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9138) * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9139) * space applications don't need to do io completion events
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9140) * polling again, they can rely on io_sq_thread to do polling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9141) * work, which can reduce cpu usage and uring_lock contention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9142) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9143) if (ctx->flags & IORING_SETUP_IOPOLL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9144) !(ctx->flags & IORING_SETUP_SQPOLL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9145) ret = io_iopoll_check(ctx, min_complete);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9146) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9147) ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9151) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9152) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9153) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9154) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9155) return submitted ? submitted : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9158) #ifdef CONFIG_PROC_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9159) static int io_uring_show_cred(struct seq_file *m, unsigned int id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9160) const struct io_identity *iod)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9162) const struct cred *cred = iod->creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9163) struct user_namespace *uns = seq_user_ns(m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9164) struct group_info *gi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9165) kernel_cap_t cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9166) unsigned __capi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9167) int g;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9169) seq_printf(m, "%5d\n", id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9170) seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9171) seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9172) seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9173) seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9174) seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9175) seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9176) seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9177) seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9178) seq_puts(m, "\n\tGroups:\t");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9179) gi = cred->group_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9180) for (g = 0; g < gi->ngroups; g++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9181) seq_put_decimal_ull(m, g ? " " : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9182) from_kgid_munged(uns, gi->gid[g]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9184) seq_puts(m, "\n\tCapEff:\t");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9185) cap = cred->cap_effective;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9186) CAP_FOR_EACH_U32(__capi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9187) seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9188) seq_putc(m, '\n');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9189) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9192) static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9194) struct io_sq_data *sq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9195) bool has_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9196) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9198) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9199) * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9200) * since fdinfo case grabs it in the opposite direction of normal use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9201) * cases. If we fail to get the lock, we just don't iterate any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9202) * structures that could be going away outside the io_uring mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9204) has_lock = mutex_trylock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9206) if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9207) sq = ctx->sq_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9209) seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9210) seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9211) seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9212) for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9213) struct fixed_file_table *table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9214) struct file *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9216) table = &ctx->file_data->table[i >> IORING_FILE_TABLE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9217) f = table->files[i & IORING_FILE_TABLE_MASK];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9218) if (f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9219) seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9220) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9221) seq_printf(m, "%5u: <none>\n", i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9223) seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9224) for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9225) struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9227) seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9228) (unsigned int) buf->len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9230) if (has_lock && !xa_empty(&ctx->personalities)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9231) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9232) const struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9234) seq_printf(m, "Personalities:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9235) xa_for_each(&ctx->personalities, index, iod)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9236) io_uring_show_cred(m, index, iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9238) seq_printf(m, "PollList:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9239) spin_lock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9240) for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9241) struct hlist_head *list = &ctx->cancel_hash[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9242) struct io_kiocb *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9244) hlist_for_each_entry(req, list, hash_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9245) seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9246) req->task->task_works != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9248) spin_unlock_irq(&ctx->completion_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9249) if (has_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9250) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9253) static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9255) struct io_ring_ctx *ctx = f->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9257) if (percpu_ref_tryget(&ctx->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9258) __io_uring_show_fdinfo(ctx, m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9259) percpu_ref_put(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9262) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9264) static const struct file_operations io_uring_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9265) .release = io_uring_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9266) .flush = io_uring_flush,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9267) .mmap = io_uring_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9268) #ifndef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9269) .get_unmapped_area = io_uring_nommu_get_unmapped_area,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9270) .mmap_capabilities = io_uring_nommu_mmap_capabilities,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9271) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9272) .poll = io_uring_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9273) .fasync = io_uring_fasync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9274) #ifdef CONFIG_PROC_FS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9275) .show_fdinfo = io_uring_show_fdinfo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9276) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9277) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9279) static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9280) struct io_uring_params *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9281) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9282) struct io_rings *rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9283) size_t size, sq_array_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9285) /* make sure these are sane, as we already accounted them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9286) ctx->sq_entries = p->sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9287) ctx->cq_entries = p->cq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9289) size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9290) if (size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9291) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9293) rings = io_mem_alloc(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9294) if (!rings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9295) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9297) ctx->rings = rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9298) ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9299) rings->sq_ring_mask = p->sq_entries - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9300) rings->cq_ring_mask = p->cq_entries - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9301) rings->sq_ring_entries = p->sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9302) rings->cq_ring_entries = p->cq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9303) ctx->sq_mask = rings->sq_ring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9304) ctx->cq_mask = rings->cq_ring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9306) size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9307) if (size == SIZE_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9308) io_mem_free(ctx->rings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9309) ctx->rings = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9310) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9313) ctx->sq_sqes = io_mem_alloc(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9314) if (!ctx->sq_sqes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9315) io_mem_free(ctx->rings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9316) ctx->rings = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9317) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9320) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9323) static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9325) int ret, fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9327) fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9328) if (fd < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9329) return fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9331) ret = io_uring_add_task_file(ctx, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9332) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9333) put_unused_fd(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9334) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9336) fd_install(fd, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9337) return fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9340) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9341) * Allocate an anonymous fd, this is what constitutes the application
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9342) * visible backing of an io_uring instance. The application mmaps this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9343) * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9344) * we have to tie this fd to a socket for file garbage collection purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9346) static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9348) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9349) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9350) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9352) ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9353) &ctx->ring_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9354) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9355) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9356) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9358) file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9359) O_RDWR | O_CLOEXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9360) #if defined(CONFIG_UNIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9361) if (IS_ERR(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9362) sock_release(ctx->ring_sock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9363) ctx->ring_sock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9364) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9365) ctx->ring_sock->file = file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9367) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9368) return file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9371) static int io_uring_create(unsigned entries, struct io_uring_params *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9372) struct io_uring_params __user *params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9374) struct user_struct *user = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9375) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9376) struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9377) bool limit_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9378) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9380) if (!entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9381) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9382) if (entries > IORING_MAX_ENTRIES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9383) if (!(p->flags & IORING_SETUP_CLAMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9384) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9385) entries = IORING_MAX_ENTRIES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9389) * Use twice as many entries for the CQ ring. It's possible for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9390) * application to drive a higher depth than the size of the SQ ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9391) * since the sqes are only used at submission time. This allows for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9392) * some flexibility in overcommitting a bit. If the application has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9393) * set IORING_SETUP_CQSIZE, it will have passed in the desired number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9394) * of CQ ring entries manually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9396) p->sq_entries = roundup_pow_of_two(entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9397) if (p->flags & IORING_SETUP_CQSIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9398) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9399) * If IORING_SETUP_CQSIZE is set, we do the same roundup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9400) * to a power-of-two, if it isn't already. We do NOT impose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9401) * any cq vs sq ring sizing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9403) if (!p->cq_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9404) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9405) if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9406) if (!(p->flags & IORING_SETUP_CLAMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9407) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9408) p->cq_entries = IORING_MAX_CQ_ENTRIES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9410) p->cq_entries = roundup_pow_of_two(p->cq_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9411) if (p->cq_entries < p->sq_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9412) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9413) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9414) p->cq_entries = 2 * p->sq_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9417) user = get_uid(current_user());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9418) limit_mem = !capable(CAP_IPC_LOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9420) if (limit_mem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9421) ret = __io_account_mem(user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9422) ring_pages(p->sq_entries, p->cq_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9423) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9424) free_uid(user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9425) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9429) ctx = io_ring_ctx_alloc(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9430) if (!ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9431) if (limit_mem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9432) __io_unaccount_mem(user, ring_pages(p->sq_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9433) p->cq_entries));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9434) free_uid(user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9435) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9437) ctx->compat = in_compat_syscall();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9438) ctx->user = user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9439) ctx->creds = get_current_cred();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9440) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9441) ctx->loginuid = current->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9442) ctx->sessionid = current->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9443) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9444) ctx->sqo_task = get_task_struct(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9446) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9447) * This is just grabbed for accounting purposes. When a process exits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9448) * the mm is exited and dropped before the files, hence we need to hang
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9449) * on to this mm purely for the purposes of being able to unaccount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9450) * memory (locked/pinned vm). It's not used for anything else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9451) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9452) mmgrab(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9453) ctx->mm_account = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9455) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9456) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9457) * The sq thread will belong to the original cgroup it was inited in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9458) * If the cgroup goes offline (e.g. disabling the io controller), then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9459) * issued bios will be associated with the closest cgroup later in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9460) * block layer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9461) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9462) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9463) ctx->sqo_blkcg_css = blkcg_css();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9464) ret = css_tryget_online(ctx->sqo_blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9465) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9466) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9467) /* don't init against a dying cgroup, have the user try again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9468) ctx->sqo_blkcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9469) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9470) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9471) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9472) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9474) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9475) * Account memory _before_ installing the file descriptor. Once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9476) * the descriptor is installed, it can get closed at any time. Also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9477) * do this before hitting the general error path, as ring freeing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9478) * will un-account as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9479) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9480) io_account_mem(ctx, ring_pages(p->sq_entries, p->cq_entries),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9481) ACCT_LOCKED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9482) ctx->limit_mem = limit_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9484) ret = io_allocate_scq_urings(ctx, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9485) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9486) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9488) ret = io_sq_offload_create(ctx, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9489) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9490) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9492) if (!(p->flags & IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9493) io_sq_offload_start(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9495) memset(&p->sq_off, 0, sizeof(p->sq_off));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9496) p->sq_off.head = offsetof(struct io_rings, sq.head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9497) p->sq_off.tail = offsetof(struct io_rings, sq.tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9498) p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9499) p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9500) p->sq_off.flags = offsetof(struct io_rings, sq_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9501) p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9502) p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9504) memset(&p->cq_off, 0, sizeof(p->cq_off));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9505) p->cq_off.head = offsetof(struct io_rings, cq.head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9506) p->cq_off.tail = offsetof(struct io_rings, cq.tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9507) p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9508) p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9509) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9510) p->cq_off.cqes = offsetof(struct io_rings, cqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9511) p->cq_off.flags = offsetof(struct io_rings, cq_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9513) p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9514) IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9515) IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9516) IORING_FEAT_POLL_32BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9518) if (copy_to_user(params, p, sizeof(*p))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9519) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9520) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9523) file = io_uring_get_file(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9524) if (IS_ERR(file)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9525) ret = PTR_ERR(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9526) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9530) * Install ring fd as the very last thing, so we don't risk someone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9531) * having closed it before we finish setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9532) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9533) ret = io_uring_install_fd(ctx, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9534) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9535) io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9536) /* fput will clean it up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9537) fput(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9538) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9541) trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9542) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9543) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9544) io_disable_sqo_submit(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9545) io_ring_ctx_wait_and_kill(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9546) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9549) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9550) * Sets up an aio uring context, and returns the fd. Applications asks for a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9551) * ring size, we return the actual sq/cq ring sizes (among other things) in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9552) * params structure passed in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9553) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9554) static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9556) struct io_uring_params p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9557) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9559) if (copy_from_user(&p, params, sizeof(p)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9560) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9561) for (i = 0; i < ARRAY_SIZE(p.resv); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9562) if (p.resv[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9563) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9566) if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9567) IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9568) IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9569) IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9570) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9572) return io_uring_create(entries, &p, params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9575) SYSCALL_DEFINE2(io_uring_setup, u32, entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9576) struct io_uring_params __user *, params)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9577) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9578) return io_uring_setup(entries, params);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9581) static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9583) struct io_uring_probe *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9584) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9585) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9587) size = struct_size(p, ops, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9588) if (size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9589) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9590) p = kzalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9591) if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9592) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9594) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9595) if (copy_from_user(p, arg, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9596) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9597) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9598) if (memchr_inv(p, 0, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9599) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9601) p->last_op = IORING_OP_LAST - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9602) if (nr_args > IORING_OP_LAST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9603) nr_args = IORING_OP_LAST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9605) for (i = 0; i < nr_args; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9606) p->ops[i].op = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9607) if (!io_op_defs[i].not_supported)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9608) p->ops[i].flags = IO_URING_OP_SUPPORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9610) p->ops_len = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9612) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9613) if (copy_to_user(arg, p, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9614) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9615) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9616) kfree(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9617) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9620) static int io_register_personality(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9622) struct io_identity *iod;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9623) u32 id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9624) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9626) iod = kmalloc(sizeof(*iod), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9627) if (unlikely(!iod))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9628) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9630) io_init_identity(iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9631) iod->creds = get_current_cred();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9633) ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)iod,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9634) XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9635) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9636) put_cred(iod->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9637) kfree(iod);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9638) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9640) return id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9643) static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9644) unsigned int nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9645) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9646) struct io_uring_restriction *res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9647) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9648) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9650) /* Restrictions allowed only if rings started disabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9651) if (!(ctx->flags & IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9652) return -EBADFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9654) /* We allow only a single restrictions registration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9655) if (ctx->restrictions.registered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9656) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9658) if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9659) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9661) size = array_size(nr_args, sizeof(*res));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9662) if (size == SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9663) return -EOVERFLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9665) res = memdup_user(arg, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9666) if (IS_ERR(res))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9667) return PTR_ERR(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9669) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9671) for (i = 0; i < nr_args; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9672) switch (res[i].opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9673) case IORING_RESTRICTION_REGISTER_OP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9674) if (res[i].register_op >= IORING_REGISTER_LAST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9675) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9676) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9679) __set_bit(res[i].register_op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9680) ctx->restrictions.register_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9681) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9682) case IORING_RESTRICTION_SQE_OP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9683) if (res[i].sqe_op >= IORING_OP_LAST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9684) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9685) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9688) __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9689) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9690) case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9691) ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9692) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9693) case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9694) ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9695) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9696) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9697) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9698) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9702) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9703) /* Reset all restrictions if an error happened */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9704) if (ret != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9705) memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9706) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9707) ctx->restrictions.registered = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9709) kfree(res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9710) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9713) static int io_register_enable_rings(struct io_ring_ctx *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9715) if (!(ctx->flags & IORING_SETUP_R_DISABLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9716) return -EBADFD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9718) if (ctx->restrictions.registered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9719) ctx->restricted = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9721) io_sq_offload_start(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9722) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9725) static bool io_register_op_must_quiesce(int op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9727) switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9728) case IORING_UNREGISTER_FILES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9729) case IORING_REGISTER_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9730) case IORING_REGISTER_PROBE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9731) case IORING_REGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9732) case IORING_UNREGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9733) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9734) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9735) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9739) static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9740) void __user *arg, unsigned nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9741) __releases(ctx->uring_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9742) __acquires(ctx->uring_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9744) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9746) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9747) * We're inside the ring mutex, if the ref is already dying, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9748) * someone else killed the ctx or is already going through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9749) * io_uring_register().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9750) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9751) if (percpu_ref_is_dying(&ctx->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9752) return -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9754) if (io_register_op_must_quiesce(opcode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9755) percpu_ref_kill(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9757) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9758) * Drop uring mutex before waiting for references to exit. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9759) * another thread is currently inside io_uring_enter() it might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9760) * need to grab the uring_lock to make progress. If we hold it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9761) * here across the drain wait, then we can deadlock. It's safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9762) * to drop the mutex here, since no new references will come in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9763) * after we've killed the percpu ref.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9764) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9765) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9766) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9767) ret = wait_for_completion_interruptible(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9768) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9769) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9770) ret = io_run_task_work_sig();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9771) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9772) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9773) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9774) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9776) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9777) io_refs_resurrect(&ctx->refs, &ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9778) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9782) if (ctx->restricted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9783) if (opcode >= IORING_REGISTER_LAST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9784) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9785) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9788) if (!test_bit(opcode, ctx->restrictions.register_op)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9789) ret = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9790) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9794) switch (opcode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9795) case IORING_REGISTER_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9796) ret = io_sqe_buffer_register(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9797) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9798) case IORING_UNREGISTER_BUFFERS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9799) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9800) if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9801) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9802) ret = io_sqe_buffer_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9803) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9804) case IORING_REGISTER_FILES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9805) ret = io_sqe_files_register(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9806) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9807) case IORING_UNREGISTER_FILES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9808) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9809) if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9810) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9811) ret = io_sqe_files_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9812) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9813) case IORING_REGISTER_FILES_UPDATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9814) ret = io_sqe_files_update(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9815) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9816) case IORING_REGISTER_EVENTFD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9817) case IORING_REGISTER_EVENTFD_ASYNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9818) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9819) if (nr_args != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9820) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9821) ret = io_eventfd_register(ctx, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9822) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9823) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9824) if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9825) ctx->eventfd_async = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9826) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9827) ctx->eventfd_async = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9828) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9829) case IORING_UNREGISTER_EVENTFD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9830) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9831) if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9832) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9833) ret = io_eventfd_unregister(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9834) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9835) case IORING_REGISTER_PROBE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9836) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9837) if (!arg || nr_args > 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9838) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9839) ret = io_probe(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9840) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9841) case IORING_REGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9842) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9843) if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9844) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9845) ret = io_register_personality(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9846) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9847) case IORING_UNREGISTER_PERSONALITY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9848) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9849) if (arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9850) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9851) ret = io_unregister_personality(ctx, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9852) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9853) case IORING_REGISTER_ENABLE_RINGS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9854) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9855) if (arg || nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9856) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9857) ret = io_register_enable_rings(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9858) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9859) case IORING_REGISTER_RESTRICTIONS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9860) ret = io_register_restrictions(ctx, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9861) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9862) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9863) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9864) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9867) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9868) if (io_register_op_must_quiesce(opcode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9869) /* bring the ctx back to life */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9870) percpu_ref_reinit(&ctx->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9871) reinit_completion(&ctx->ref_comp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9873) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9876) SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9877) void __user *, arg, unsigned int, nr_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9879) struct io_ring_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9880) long ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9881) struct fd f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9883) f = fdget(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9884) if (!f.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9885) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9887) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9888) if (f.file->f_op != &io_uring_fops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9889) goto out_fput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9891) ctx = f.file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9893) mutex_lock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9894) ret = __io_uring_register(ctx, opcode, arg, nr_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9895) mutex_unlock(&ctx->uring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9896) trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9897) ctx->cq_ev_fd != NULL, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9898) out_fput:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9899) fdput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9900) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9903) static int __init io_uring_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9905) #define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9906) BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9907) BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9908) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9910) #define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9911) __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9912) BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9913) BUILD_BUG_SQE_ELEM(0, __u8, opcode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9914) BUILD_BUG_SQE_ELEM(1, __u8, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9915) BUILD_BUG_SQE_ELEM(2, __u16, ioprio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9916) BUILD_BUG_SQE_ELEM(4, __s32, fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9917) BUILD_BUG_SQE_ELEM(8, __u64, off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9918) BUILD_BUG_SQE_ELEM(8, __u64, addr2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9919) BUILD_BUG_SQE_ELEM(16, __u64, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9920) BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9921) BUILD_BUG_SQE_ELEM(24, __u32, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9922) BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9923) BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9924) BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9925) BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9926) BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9927) BUILD_BUG_SQE_ELEM(28, __u32, poll32_events);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9928) BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9929) BUILD_BUG_SQE_ELEM(28, __u32, msg_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9930) BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9931) BUILD_BUG_SQE_ELEM(28, __u32, accept_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9932) BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9933) BUILD_BUG_SQE_ELEM(28, __u32, open_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9934) BUILD_BUG_SQE_ELEM(28, __u32, statx_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9935) BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9936) BUILD_BUG_SQE_ELEM(28, __u32, splice_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9937) BUILD_BUG_SQE_ELEM(32, __u64, user_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9938) BUILD_BUG_SQE_ELEM(40, __u16, buf_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9939) BUILD_BUG_SQE_ELEM(42, __u16, personality);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9940) BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9942) BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9943) BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9944) req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9945) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9946) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9947) __initcall(io_uring_init);